## Import Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision.models import vgg16
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm

## Load data

In [None]:
train_csv_path = '../input/happy-whale-and-dolphin/train.csv'
train_df = pd.read_csv(train_csv_path)
train_df.head()

In [None]:
train_df.describe()

In [None]:
train_df.dtypes

In [None]:
# Get unique species
unique_species = train_df.species.unique()
unique_species

In [None]:
# Let's see the distribution of each species
sns.countplot(train_df.species)
plt.xticks(rotation=90)

## Custom Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, df, label_to_id, transform):
        self.root_dir = root_dir
        self.df = df
        self.label_to_id = label_to_id
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.root_dir, self.df.iloc[index, 0])
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = self.df.iloc[index, 2]
        target = self.label_to_id[label]
        
        image = self.transform(image)
        return image, torch.tensor(target)

In [None]:
train_transforms = transforms.Compose([transforms.ToPILImage(),
                                       transforms.Resize((56,56)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5,0.5,0.5],
                                                            [0.5,0.5,0.5])])

In [None]:
unique_individual_ids = train_df.individual_id.unique()
unique_individual_ids

In [None]:
label_to_id = {}
id_to_label = {}
idx = 0
for label in unique_individual_ids:
    label_to_id[label] = idx
    id_to_label[idx] = label
    idx += 1

In [None]:
root_dir = '../input/happy-whale-and-dolphin/train_images'

dataset = CustomDataset(root_dir,
                        train_df,
                        label_to_id,
                        train_transforms)

train_loader = DataLoader(dataset, batch_size=8, shuffle=True)

## Load pretrained vgg16

In [None]:
model = vgg16(pretrained=True)

In [None]:
last_checkpoint = torch.load('../input/happywhale-pytorch-vgg16/last_checkpoint.pth.tar')

In [None]:
last_checkpoint.keys()

In [None]:
model.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(4096, len(label_to_id))
)

In [None]:
for name, param in model.named_parameters():
    if 'classifier' not in name:
        param.requires_grad = False

In [None]:
model.load_state_dict(last_checkpoint['model_state_dict'])

In [None]:
images, targets = next(iter(train_loader))
images.shape, targets.shape

In [None]:
output = model(images.cpu())
output.shape

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Train model

model.to(device)
start_epoch = last_checkpoint['epoch']
EPOCHS = start_epoch + 4

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
optimizer.load_state_dict(last_checkpoint['optimizer_state_dict'])

last_train_loss = 0

for epoch in range(start_epoch, EPOCHS):
    print(f'Epoch: {epoch+1}/{EPOCHS}')
    
    correct = 0
    total = 0
    losses = []
    
    for batch_idx, data in enumerate(tqdm(train_loader)):
        images, targets = data
        images = images.to(device)
        targets = targets.to(device)
        
        output = model(images)  # (batch_size, num_classes)
        
        loss = criterion(output, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _, pred = torch.max(output, 1)
        correct += (pred == targets).sum().item()
        total += pred.size(0)
        
        losses.append(loss.item())
        
    train_loss = np.mean(losses)
    train_acc = correct * 1.0 / total
    
    last_train_loss = train_loss
    print(f'Train Loss: {train_loss}\tTrain Acc: {train_acc}')

In [None]:
torch.save({
    'epoch': EPOCHS,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': last_train_loss
}, 'last_checkpoint.pth.tar')

# Make predictions

In [None]:
sample_df = pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
sample_df.head()

In [None]:
test_transforms = transforms.Compose([transforms.ToPILImage(),
                                     transforms.Resize(56),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5,0.5,0.5],
                                                          [0.5,0.5,0.5])])

In [None]:
test_img_dir = '../input/happy-whale-and-dolphin/test_images'

res = []

for i in tqdm(range(sample_df.shape[0])):
    image_path = os.path.join(test_img_dir, sample_df.iloc[i,0])
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    image = test_transforms(image)
    image = image.unsqueeze(0)

    output = model(image.to(device))
    _, tk = torch.topk(output, 5, dim=1)
    pred = []
    for j in range(len(tk[0])):
        pred.append(id_to_label[tk[0][j].item()])
    pred = ' '.join(pred)
    
    sample_df.iloc[i, 1] = pred

In [None]:
sample_df.to_csv('submission.csv', index=False)

In [None]:
print('Done!')