In [None]:
!pip install git+https://github.com/Cadene/pretrained-models.pytorch

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import cv2
import torch
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms, Compose
import torchvision.models as models
import pretrainedmodels
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm_notebook, tqdm
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
PATH = '../input/'
TRAIN = os.path.join(PATH, 'train')
TEST = os.path.join(PATH, 'test')

train_df = pd.read_csv(os.path.join(PATH, 'train.csv'))
# Try known only 
# train_df = train_df[train_df['Id'] != 'new_whale']
NCATS = train_df['Id'].nunique()
catlist = train_df['Id'].unique()
print("Number of train images: {}".format(len(os.listdir("../input/train"))))
print("Number of test images: {}".format(len(os.listdir("../input/test"))))

In [None]:
# Helpers

def label2sparse(label):
    vec = np.zeros((NCATS, 1))
    vec[np.ravel(np.where(catlist == label))[0]] = 1
    return torch.Tensor(vec.squeeze())

def label2ix(label):
    return np.ravel(np.where(catlist == label)).squeeze()

def ix2label(ix):
    return catlist[ix]

def sparse2label(sparse):
    return catlist[np.argmax(sparse.squeeze(), axis=-1)]

In [None]:
pretrainedmodels.model_names

In [None]:
model_name = 'se_resnext50_32x4d'
pretrainedmodels.pretrained_settings[model_name]
model = pretrainedmodels.__dict__[model_name](num_classes=1000)

In [None]:
# model.last_linear = nn.Conv2d(2688, NCATS, kernel_size=(1, 1), stride=(1, 1))
model.last_linear = nn.Linear(in_features=2048, out_features=NCATS, bias=True)

In [None]:
train_transform = Compose([transforms.ToPILImage(), 
                     transforms.RandomHorizontalFlip(0.5),
                     transforms.Resize((224,224)),
                     transforms.RandomAffine(degrees=15),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225])])

test_transform = Compose([transforms.ToPILImage(), 
                     transforms.Resize((224,224)),
                     transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),])

In [None]:
def accuracy(output, target, topk=(5,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
    
def mapk(output, target, k=5):
    """
    Computes the mean average precision at k.
    
    Parameters
    ----------
    output (torch.Tensor): A Tensor of predicted elements.
                           Shape: (N,C)  where C = number of classes, N = batch size
    target (torch.int): A Tensor of elements that are to be predicted. 
                        Shape: (N) where each value is  0≤targets[i]≤C−1
    k (int, optional): The maximum number of predicted elements
    
    Returns
    -------
    score (torch.float):  The mean average precision at k over the output
    """
    with torch.no_grad():
        batch_size = target.size(0)

        _, pred = output.topk(k, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        for i in range(k):
            correct[i] = correct[i]*(k-i)
            
        score = correct[:k].view(-1).float().sum(0, keepdim=True)
        score.mul_(1.0 / (k * batch_size))
        return score

In [None]:
imgs= train_df['Image'].values
labels = train_df['Id'].values

In [None]:
X_train, X_val, y_train, y_val = train_test_split(imgs, labels, test_size=0.05)

In [None]:
class WhaleDataset(Dataset):
    def __init__(self, img_list, label_list=None, transforms=None, train=True):
        super(WhaleDataset, self).__init__()
        self.img_list = img_list
        self.label_list = label_list
        self.transforms = transforms
        self.train = train
        
    def __getitem__(self, idx):
        img = cv2.imread(os.path.join(TRAIN, self.img_list[idx]))
        if self.transforms:
            img = self.transforms(img)
        if self.train:
            y = label2ix(self.label_list[idx])
            return img, y
        return img
    
    def __len__(self):
        return len(self.img_list)

In [None]:
train_data = WhaleDataset(img_list=X_train, label_list=y_train, transforms=train_transform, train=True)
valid_data = WhaleDataset(img_list=X_val, label_list=y_val, transforms=test_transform, train=True)

In [None]:
train_loader = DataLoader(train_data, shuffle=True, batch_size=32)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=32)

In [None]:
check = next(iter(train_loader))
check_ims = check[0]
check_labels = check[1]

In [None]:
model.cpu()
t = model(check_ims)

In [None]:
_, axs = plt.subplots(1, min(5,len(check_ims)), figsize=(20,5))
for i in range(min(5, len(check_ims))):
    axs[i].imshow(check_ims[i].permute(1,2,0))
    axs[i].set_title(ix2label(check_labels[i]))

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
criterion = nn.CrossEntropyLoss()

In [None]:
%%timeit

epoch = 1
model.cuda()
model.train()
best_map5 = 0.
pbar = tqdm_notebook(train_loader)
for ep in range(epoch):
    tr_loss, tr_score = 0., 0.
    for batch_idx, (x, y) in enumerate(pbar):
        pbar.set_postfix({"Train loss": tr_loss / (batch_idx + 1), "Train Map@5": tr_score / (batch_idx + 1)})
        x, y = x.cuda(), y.cuda()
        outputs = model(x)
        loss = criterion(outputs, y)
        tr_loss += loss.item()
        tr_score += mapk(outputs, y).item()
        loss.backward()
        optimizer.step()
        if not (batch_idx + 1) % 50:
#             print(f"Train loss: {tr_loss / (batch_idx + 1)}")
            print(f"Train loss {tr_loss / (batch_idx + 1)} Train Map@5 {tr_score / (batch_idx + 1)}")
        
        if not (batch_idx + 1) % 100:
            model.eval()
            val_loss, val_score = 0., 0.
            for val_idx, (x, y) in enumerate(valid_loader):
                x, y = x.cuda(), y.cuda()
                outputs = model(x)
                loss = criterion(outputs, y)
                val_loss += loss.item()
                val_score += mapk(outputs, y).item()
            full_val_loss = val_loss / len(valid_loader)
            full_val_score = val_score / len(valid_loader)
            print(f"Validation loss {full_val_loss:.5f}, Validation Map@5 {full_val_score:.5f}")
            if full_val_score > best_map5:
                print(f'Validation Map@5 increased from {best_map5:.5f} to {full_val_score:.5f}')
                best_map5 = full_val_score
                torch.save({'epoch': epoch,
                           'model_state_dict': model.state_dict(),
                           'optimizer_state_dict': optimizer.state_dict(),
                           'loss': full_val_loss,
                           'map5': full_val_score}, f'{model_name}_run1.pth')

            model.train()

In [None]:
print("We are done")