# Identifying Whale and Dolphin Individuals

### Import Libraries

In [1]:
from fastai.vision.all import *
import pandas as pd
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
import os

### Data Import

In [4]:
path = Path('../input/happy-whale-and-dolphin')
train_df = pd.read_csv(path/'train.csv')

train_df.head()

In [5]:
train_df.nunique() #Getting the number of unique values for each column

In [None]:
def sample_img(ds, group):
    """
    Display sample training data
    """
    
    assert isinstance(group, str), "Group argument must be a string!"
    
    try:
        sub = ds[ds['Dolphin or Whale']==group]
        image_ids = sub.groupby("species").sample(n=1, random_state=1)
    except:
        print("Cannot find group in dataset!")
        raise

    fig = plt.figure(figsize=(30, 20))

    axes = []
    base = '../input/happy-whale-and-dolphin/train_images//'

    for i in range(6):
        img_id = image_ids['image'].iloc[i]
        sample = ds[ds['image']==img_id]['image'].iloc[0]
        img_path = base + sample
        
        axes.append(fig.add_subplot(2, 3, i+1))
        axes[-1].set_title(image_ids['species'].iloc[i], fontsize=26)
        plt.imshow(plt.imread(img_path))
    
    plt.suptitle('Sample {} Images'.format(group))
    fig.tight_layout()    
    plt.show()

In [None]:
sample_img(train_df, 'Dolphin')

In [None]:
sample_img(train_df, 'Whale')

In [6]:
train_df.species.value_counts()

In [10]:
skf = StratifiedKFold(n_splits = 5)

for fold, (train_index ,val_index) in enumerate(skf.split(X = train_df, y = train_df.individual_id)):
    train_df.loc[val_index, "kfold"] = fold    

In [11]:
train_df.head()

In [15]:
def splitter(df): 
    train = df.index[df.kfold != 0].tolist()
    valid = df.index[df.kfold == 0].tolist()
    return [train,valid]

In [16]:
datablock = DataBlock(
    blocks = (ImageBlock, MultiCategoryBlock),
    get_x = ColReader('image', path/'train_images'),
    get_y = ColReader('individual_id'),
    splitter = splitter,
    item_tfms = Resize(256)
)

In [17]:
dls = datablock.dataloaders(train_df, shuffle=True, num_workers=2)

In [18]:
learn = cnn_learner(dls, resnet34, metrics = accuracy_multi)

In [None]:
learn.fine_tune(5)

### Export Model

In [None]:
learn.export('happy_whale_model.pkl')

### Prediction

In [None]:
learn.predict('../input/happy-whale-and-dolphin/test_images/000110707af0ba.jpg')

In [None]:
test_img = '../input/happy-whale-and-dolphin/test_images/000110707af0ba.jpg'
sample_img = PILImage.create(test_img)
sample_img.to_thumb(250)

In [None]:
submission_df = pd.read_csv(path/'sample_submission.csv')
submission_df.head()

In [None]:
results = pd.DataFrame(columns = ['image', 'predictions'])
test_dir = path/'test_images'
test_images = os.listdir(test_dir)

for img, iter in enumerate(test_images):
    pred, _, _ = learn.predict(test_dir/img)              
    results.loc[iter] = [img, pred]
    
results.head()