# Welcome to Happy Whale notebook!

# What is the problem?

**In this competition, we’ll develop a model to match individual whales and dolphins by unique—but often subtle—characteristics of their natural markings. We'll pay particular attention to dorsal fins and lateral body views in image sets from a multi-species dataset built by 28 research institutions. The best submissions will suggest photo-ID solutions that are fast and accurate.**

# Method of solving


1. ~Learn about Dataset
2. ~EDA & Visualization
3. ~Data Cleaning
4. ~Model Selection
5. ~Prediction & Submission

**If you like it , please upvote this notebook**

# Importing Different Libraries

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from torchvision import transforms
from matplotlib.pyplot import imshow
from IPython.display import HTML
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from keras import layers
from keras.models import Sequential
from keras.preprocessing import image
from keras.layers import Input, Dense, Activation, Dropout
from keras.layers import Flatten, BatchNormalization, Conv2D
from keras.layers import MaxPooling2D, AveragePooling2D
from keras.applications.imagenet_utils import preprocess_input
from PIL import Image
from tqdm import tqdm
import random as rnd
import cv2
!pip install livelossplot
from livelossplot import PlotLossesKeras
%matplotlib inline

# Checking Working Directory

In [None]:
#Checking current working directory!

cwd = os.getcwd()
print("Your current working directory is : " , cwd)

**Print Data Location**

In [None]:
print(os.listdir('../input/happy-whale-and-dolphin'))

# Loading data from kernel

In [None]:
img_train_path = os.path.abspath('../input/happy-whale-and-dolphin/train_images')
trainedData = pd.read_csv('../input/happy-whale-and-dolphin/train.csv')
trainedData['path'] = '../input/happy-whale-and-dolphin/train_images/' + trainedData['image']
img_test_path = os.path.abspath('../input/happy-whale-and-dolphin/test_images')
csv_train_path = os.path.abspath('../input/happy-whale-and-dolphin/train.csv')
csv_train_path

In [None]:
df = pd.read_csv(csv_train_path)
df.head()

# Statistics of Datasets

In [None]:
#Print total counts
print('Train samples count: ', len(trainedData))
trainedData.columns

In [None]:
#Print different species
print('Species Count: ',len(trainedData['species'].value_counts()))
trainedData['species'].value_counts()

# Data Visualization

**Number with species**

In [None]:
plt.figure(figsize=(8,8))
sns.countplot(data=trainedData, y = 'species',  palette='crest', dodge=False)
plt.show()

In [None]:
#plt.figure(figsize = (15,12))
#for idx,i in enumerate(trainedData.species.unique()):
#    plt.subplot(4,7,idx+1)
#    df = trainedData[trainedData['species'] ==i].reset_index(drop = True)
  #  image_path = df.loc[rnd.randint(0, len(df))-1,'path']
#    img = Image.open(image_path)
#    img = img.resize((224,224))
#    plt.imshow(img)
 #   plt.axis('off')
#    plt.title(i)
#plt.tight_layout()
#plt.show()

**Visualization of Unique Species**

In [None]:
def plot_species(df,species_name):
    plt.figure(figsize = (12,12))
    species_df = df[df['species'] ==species_name].reset_index(drop = True)
    plt.suptitle(species_name)
    for idx,i in enumerate(np.random.choice(species_df['path'],32)):
        plt.subplot(8,8,idx+1)
        image_path = i
        img = Image.open(image_path)
        img = img.resize((224,224))
        plt.imshow(img)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
for species in trainedData['species'].unique():
    plot_species(trainedData , species)

**Individual visualization**

In [None]:
def plot_individual(df,individual_id):
    plt.figure(figsize = (12,12))
    species_df = df[df['individual_id'] ==individual_id].reset_index(drop = True)
    plt.suptitle(individual_id)
    for idx,i in enumerate(np.random.choice(species_df['path'],24)):
        plt.subplot(8,8,idx+1)
        image_path = i
        img = Image.open(image_path)
        img = img.resize((224,224))
        plt.imshow(img)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
numShow = trainedData.individual_id.value_counts().tail(10)
for i in numShow.index:
    plot_individual(trainedData , i)

**Image Path Selecting**

In [None]:
df['Image_path'] = [os.path.join(img_train_path,whale) for whale in df['image']]
df.head()

In [None]:
full_path_random_whales = np.random.choice(df['Image_path'],5)
full_path_random_whales

In [None]:
%matplotlib inline
for whale in full_path_random_whales:
    img = Image.open(whale)
    plt.imshow(img)
    plt.show()

In [None]:
img = cv2.imread(full_path_random_whales[0])
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(128, 128), interpolation=cv2.INTER_CUBIC)
plt.imshow(res,cmap='gray')
plt.show()

# Normalization of data

In [None]:
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Grayscale(num_output_channels=3),
   transforms.Resize(128),
   transforms.CenterCrop(128),
   transforms.ToTensor(),
   normalize
])
imgs = [Image.open(whale) for whale in full_path_random_whales]
imgs_tensor = [preprocess(whale) for whale in imgs]

In [None]:
imgs_tensor[0].shape

In [None]:
img = imgs_tensor[0]
plt.imshow(img[0],cmap='gray')
plt.show()

# Data Cleaning!

**Counting number using ID**

In [None]:
df.individual_id.value_counts().head()

In [None]:
dolWhale = df['individual_id'] != 'new_whale'
df = df[dolWhale]
df.individual_id.value_counts().head()

**Null Checking**

In [None]:
trainedData.isna().sum()

**Cleaning & Displaying workflow**

In [None]:
print("Number of unique species : ", trainedData['species'].nunique())

trainedData['species'].replace({
    'bottlenose_dolpin' : 'bottlenose_dolphin',
    'kiler_whale' : 'killer_whale',
    'beluga' : 'beluga_whale',
    'globis' : 'short_finned_pilot_whale',
    'pilot_whale' : 'short_finned_pilot_whale'
},inplace =True)

print('\nAfter Removing duplicate labels : ')
print("Total unique species : ", trainedData['species'].nunique())


trainedData['class'] = trainedData['species'].apply(lambda x: x.split('_')[-1])
trainedData.head()

# Encoding 

In [None]:
unique_classes = pd.unique(df['individual_id'])
encoding = dict(enumerate(unique_classes))
encoding = {value: key for key, value in encoding.items()}
df = df.replace(encoding)
df.head()

**If you like it , please support me by upvoting!**

# Modeling & Conclusion

In [None]:
test = df['Image_path'][:600]
imgs = [Image.open(whale) for whale in test]
imgs_tensor = torch.stack([preprocess(whale) for whale in imgs])

In [None]:
labels = torch.tensor(df['individual_id'][:600].values)
max_label = int(max(labels)) +1
max_label

**Building Model**

In [None]:
model = nn.Sequential(nn.Linear(128*128, 256),
                      nn.Sigmoid(),
                      nn.Linear(256, 128),
                      nn.Sigmoid(),
                      nn.Linear(128, max_label),
                      nn.LogSoftmax(dim=1))

optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.NLLLoss()

model

**Finding Loss**

In [None]:
#epochs = 5
#batch_size = 125
#iters = int(len(imgs_tensor)/batch_size)
#next_batch = 0
#for e in range(epochs):
 #   running_loss = 0
 #   next_batch = 0
  #  for n in range(iters):
    #    batch_images = imgs_tensor[next_batch:next_batch+batch_size] 
     #   batch_images = batch_images.view(batch_images.shape[0], -1)
       # batch_labels = labels[next_batch:next_batch+batch_size]       
       # optimizer.zero_grad()       
       # output = model(batch_images)
        #loss = criterion(output, batch_labels)   
        #loss.backward()
        #optimizer.step()      
        #running_loss += loss.item()       
        #next_batch += batch_size
        
    #print(running_loss)
    

**Under Construction ! If you like it please upvote this notebook**

# Continue..

# This notebook will be modified more , stay with me & support me by Upvoting !

References -
1.https://www.kaggle.com/jhonatansilva31415/whales-a-simple-guide/notebook