# Happywhale Whale and Dolphin Identification Using Wide ResNet50 Transfer Learning

# Imports and installation

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os # for os stuff
from sklearn import preprocessing  # for preprocessing
from tqdm import tqdm # for progress bar

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# pytorch imports
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torchvision.io import read_image

# for visualizing
import matplotlib.pyplot as plt
from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(dir(models))

In [None]:
torch.cuda.empty_cache()

# Transformers
For the training set, resize down to 256x256, take a random crop of 224, then applies random horizontal flip and normalization.

For test set, same thing as training set except the crop is centered and no flip.

In [None]:
train_transformer = nn.Sequential(
    transforms.ConvertImageDtype(torch.float),
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

test_transformer = nn.Sequential(
    transforms.ConvertImageDtype(torch.float),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

# Preparing the data


Putting the metadata into a dataframe and correcting duplicate species labels. Adding a path column so it's easier to find the images.

In [None]:
train_dataframe = pd.read_csv("../input/happy-whale-and-dolphin/train.csv")
train_dataframe['species'].replace({
    'bottlenose_dolpin': 'bottlenose_dolphin',
    'kiler_whale': 'killer_whale'
}, inplace=True)
train_dataframe['path'] = '../input/happy-whale-and-dolphin/train_images/'+ train_dataframe['image']

test_images = os.listdir('../input/happy-whale-and-dolphin/test_images')
test_dataframe = pd.DataFrame(data=test_images, columns=['image'])
test_dataframe['path'] = '../input/happy-whale-and-dolphin/test_images/'+ test_dataframe['image']

num_individuals = train_dataframe['individual_id'].nunique()
print("num individuals:", num_individuals)
print("num species:", train_dataframe['species'].nunique())
print(train_dataframe.species.value_counts())
print(train_dataframe)
print(test_dataframe)

Here we also convert the individual ids to integers so we can work with tensors for the labels.

In [None]:
label_encoder = preprocessing.LabelEncoder()
train_dataframe['individual_id'] = label_encoder.fit_transform(train_dataframe['individual_id'])
print(train_dataframe)

Dataset class.
The image preprocessing is done on the GPU.

In [None]:
class HappyDataSet(torch.utils.data.Dataset):
    def __init__(self, dataframe, transformer, test=False):
        self.dataframe = dataframe
        self.transformer = transformer
        self.test = test
        
    def __len__(self):
        return self.dataframe.shape[0]
    
    def __getitem__(self, idx):
        path = self.dataframe.loc[idx, "path"]
        image = read_image(path, torchvision.io.ImageReadMode.RGB)
        image = image.to(device)
        if self.transformer is not None:
            image = self.transformer(image)
        label = self.dataframe.loc[idx, 'image'] if self.test else self.dataframe.loc[idx, 'individual_id']
        return image, label
        

This is an example of one of the grayscale images that need to be converted to RGB.

In [None]:
Image.open(train_dataframe.loc[38, "path"]).convert('RGB')

# Make the dataloaders

In [None]:
train_set = HappyDataSet(train_dataframe, transformer=train_transformer, test=False)
test_set = HappyDataSet(test_dataframe, transformer=test_transformer, test=True)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False)

In [None]:
# # Check if dataloader breaks
# dataiter = iter(train_loader)
# images, labels = dataiter.next()

# Training Function

In [None]:
def train(model, optimizer, criterion, train_loader, start_epoch=0, epochs=30, lr_schedule={}, state=None, checkpoint_schedule=[]):
    model.to(device)
    model.train()
    
    if state:
        # load provided state
        model.load_state_dict(state['model'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        
    for epoch in range(start_epoch):
        # Go through learning rate schedule up to the start epoch
        if epoch in lr_schedule:
            # Update learning rate at scheduled epoch
            for group in optimizer.param_groups:
                group['lr'] = lr_schedule[epoch]
    
    for epoch in range(epochs):
        if epoch in lr_schedule:
            # Update learning rate at scheduled epoch
            for group in optimizer.param_groups:
                group['lr'] = lr_schedule[epoch]

        epoch_loss = 0.
        for batch in tqdm(train_loader):
            images, labels = batch[0].to(device), batch[1].to(device)
        
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            epoch_loss += loss.item()
            loss.backward()
            optimizer.step()

        print(f'loss: {epoch_loss / len(train_loader)}, e={epoch}')
        if epoch in checkpoint_schedule:
            state = {'epoch': epoch+1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
            torch.save(state, f'./checkpoint_{epoch+1}.pkl')

# Get the model and train it

This is using Adam as the optimizer, with cross entropy loss.

In [None]:
# resnet = models.wide_resnet50_2(pretrained=True)
# # change the final layer to num_individuals predictions
# resnet.fc = nn.Linear(2048, num_individuals)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(resnet.parameters())

# train(resnet, optimizer, criterion, train_loader, epochs=2, state=torch.load('../input/happywhale-checkpoints/checkpoint_1.pkl'), lr_schedule={0:.1, 1:.01, 2:.001}, checkpoint_schedule=[0,1,2])

# Make predictions

In [None]:
# evaluate from a checkpoint
resnet = models.wide_resnet50_2()
resnet.fc = nn.Linear(2048, num_individuals)
state = torch.load('../input/happywhale-checkpoints/checkpoint_1.pkl')
resnet.load_state_dict(state['model'])

In [None]:
# For testing out the prediction
resnet.to(device)
resnet.eval()
with torch.no_grad():
    dataiter = iter(test_loader)
    image, label = dataiter.next()
    label, = label
    print(label)
    image = image.to(device)
    output = resnet(image)
    flattened_output = torch.flatten(output)
    probs = F.softmax(flattened_output, dim=0)
    top_5_values, top_5_predicted = torch.topk(probs, k=5)
    print(top_5_values, top_5_predicted)
    top_5_ids = label_encoder.inverse_transform(top_5_predicted.detach().cpu().numpy())
    print(top_5_values, top_5_ids)

In [None]:
# outfile = open('submission.csv', 'w')
# outfile.write('image,predictions\n')
# resnet.to(device)
# resnet.eval()
# new_individual_threshold = 100/num_individuals

# with torch.no_grad():
#     for image, label in tqdm(test_loader):
#         image = image.to(device)
#         label, = label
#         line = f'{label},' # the line we will write to file
#         output = resnet(image)
#         flattened_output = torch.flatten(output)
#         output_probs = F.softmax(flattened_output, dim=0)
        
#         top_5_values, top_5_predicted = torch.topk(output_probs, k=5)
#         top_5_ids = label_encoder.inverse_transform(top_5_predicted.detach().cpu().numpy())
        
#         # put in the top 4 scorers first
#         for i in range(4):
#             line += top_5_ids[i] + " "
#         if torch.min(top_5_values) < new_individual_threshold:
#             # predict new individual
#             line += "new_individual"
#         else:
#             # predict the last top 5 scorer
#             line += top_5_ids[4]
#         line += "\n"
#         outfile.write(line)
        
# outfile.close()

The code for generating a random submission

In [None]:
# # Random guessing
# outfile = open('submission.csv', 'w')
# outfile.write('image,predictions\n')

# with torch.no_grad():
#     for image, label in tqdm(test_loader):
#         image = image.to(device)
#         label, = label
#         line = f'{label},' # the line we will write to file
#         top_5_predicted = torch.randperm(num_individuals+1)
#         if num_individuals in top_5_predicted:
#             top_5_predicted = top_5_predicted[top_5_predicted != num_individuals]
#         top_5_ids = label_encoder.inverse_transform(top_5_predicted.detach().cpu().numpy())
        
#         # put in the top 4 scorers first
#         for i in range(4):
#             line += top_5_ids[i] + " "
#         if len(top_5_ids) == 4:
#             # predict new individual
#             line += "new_individual"
#         else:
#             # predict the last top 5 scorer
#             line += top_5_ids[4]
#         line += "\n"
#         outfile.write(line)
        
# outfile.close()