## Importing Libraries

In [5]:
import os
import pandas as pd

import torch
import torch.nn as nn
import torchvision

from torchvision.transforms import transforms
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset

from torch.optim import Adam
from torch.autograd import Variable


import cv2

%matplotlib inline

## Reading Data

In [6]:
root_dir = "data/"
train_dir = os.path.join(root_dir, "train_images")
train_csv = os.path.join(root_dir, "train_data.csv")
test_dir = os.path.join(root_dir, "test_images")
test_csv = os.path.join(root_dir, "test_data.csv")

In [7]:
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

In [8]:
print(f"Train Dataset:{len(train_df)} and Test Dataset {len(test_df)}")

Train Dataset:51033 and Test Dataset 27956


In [9]:
train_df.head()

Unnamed: 0,image,species,individual_id,path,class,width,height,split
0,00021adfb725ed.jpg,melon_headed_whale,cadddb1636b9,data/train_images/00021adfb725ed.jpg,whale,804,671,Train
1,000562241d384d.jpg,humpback_whale,1a71fbb72250,data/train_images/000562241d384d.jpg,whale,3504,2336,Train
2,0007c33415ce37.jpg,false_killer_whale,60008f293a2b,data/train_images/0007c33415ce37.jpg,whale,3599,2399,Train
3,0007d9bca26a99.jpg,bottlenose_dolphin,4b00fe572063,data/train_images/0007d9bca26a99.jpg,dolphin,3504,2336,Train
4,00087baf5cef7a.jpg,humpback_whale,8e5253662392,data/train_images/00087baf5cef7a.jpg,whale,3599,2699,Train


## Label Mapping

In [10]:
species = list(train_df.species.unique())

In [11]:
species

['melon_headed_whale',
 'humpback_whale',
 'false_killer_whale',
 'bottlenose_dolphin',
 'beluga_whale',
 'minke_whale',
 'fin_whale',
 'blue_whale',
 'gray_whale',
 'southern_right_whale',
 'common_dolphin',
 'killer_whale',
 'short_finned_pilot_whale',
 'dusky_dolphin',
 'long_finned_pilot_whale',
 'sei_whale',
 'spinner_dolphin',
 'cuviers_beaked_whale',
 'spotted_dolphin',
 'brydes_whale',
 'commersons_dolphin',
 'white_sided_dolphin',
 'rough_toothed_dolphin',
 'pantropic_spotted_dolphin',
 'pygmy_killer_whale',
 'frasiers_dolphin']

In [12]:
label_map = {}

In [13]:
for i in range(0, len(species)):
    label_map[species[i]] = i

In [14]:
label_map

{'melon_headed_whale': 0,
 'humpback_whale': 1,
 'false_killer_whale': 2,
 'bottlenose_dolphin': 3,
 'beluga_whale': 4,
 'minke_whale': 5,
 'fin_whale': 6,
 'blue_whale': 7,
 'gray_whale': 8,
 'southern_right_whale': 9,
 'common_dolphin': 10,
 'killer_whale': 11,
 'short_finned_pilot_whale': 12,
 'dusky_dolphin': 13,
 'long_finned_pilot_whale': 14,
 'sei_whale': 15,
 'spinner_dolphin': 16,
 'cuviers_beaked_whale': 17,
 'spotted_dolphin': 18,
 'brydes_whale': 19,
 'commersons_dolphin': 20,
 'white_sided_dolphin': 21,
 'rough_toothed_dolphin': 22,
 'pantropic_spotted_dolphin': 23,
 'pygmy_killer_whale': 24,
 'frasiers_dolphin': 25}

In [15]:
train_df['label'] = train_df['species'].apply(lambda x: label_map.get(x))

In [16]:
train_df.head()

Unnamed: 0,image,species,individual_id,path,class,width,height,split,label
0,00021adfb725ed.jpg,melon_headed_whale,cadddb1636b9,data/train_images/00021adfb725ed.jpg,whale,804,671,Train,0
1,000562241d384d.jpg,humpback_whale,1a71fbb72250,data/train_images/000562241d384d.jpg,whale,3504,2336,Train,1
2,0007c33415ce37.jpg,false_killer_whale,60008f293a2b,data/train_images/0007c33415ce37.jpg,whale,3599,2399,Train,2
3,0007d9bca26a99.jpg,bottlenose_dolphin,4b00fe572063,data/train_images/0007d9bca26a99.jpg,dolphin,3504,2336,Train,3
4,00087baf5cef7a.jpg,humpback_whale,8e5253662392,data/train_images/00087baf5cef7a.jpg,whale,3599,2699,Train,1


## Data Preparation

In [17]:
transformations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    transforms.Resize((32,32)),
])

In [18]:
batch_size = 10
number_of_labels = 26

In [20]:
# def load_split_train_test

In [21]:
class WhaleDataset(Dataset):
    def __init__(self, df: pd.DataFrame, image_dir: str, transform: None):
        self.df = df
        self.image_dir = image_dir
        self.image_paths = self.df.path
        self.labels = self.df.label
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, item):
        image_path = self.image_paths.iloc[item]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transform(image)
        label = self.labels.iloc[item]

        return image, label


In [22]:
valid_data_split = 0.1

In [23]:
valid_df = train_df.sample(frac=valid_data_split, replace=False, random_state=1).copy()
train_df = train_df[~train_df['image'].isin(valid_df['image'])].copy()

In [24]:
print(train_df.shape)
print(valid_df.shape)

(45930, 9)
(5103, 9)


In [25]:
train_df.reset_index(drop=True, inplace=True)
valid_df.reset_index(drop=True, inplace=True)

In [26]:
training_dataset = WhaleDataset(df=train_df, image_dir=train_dir, transform=transformations)

In [27]:
validation_dataset = WhaleDataset(df=valid_df, image_dir=train_dir, transform=transformations)

In [28]:
len(training_dataset)

45930

In [29]:
len(validation_dataset)

5103

In [30]:
dataset_dict = {"train": training_dataset, "val": validation_dataset}

In [31]:
train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
valid_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [32]:
len(train_loader)

4593

In [33]:
len(valid_loader)

511

## Model steup

In [34]:
class CnnModel(nn.Module):
    def __init__(self):
        super(CnnModel, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(12)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(12)
        self.pool = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(24)
        self.conv5 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(24)
        self.fc1 = nn.Linear(24*10*10, 26)

    def forward(self, input):
        output = nn.functional.relu(self.bn1(self.conv1(input)))
        output = nn.functional.relu(self.bn2(self.conv2(output)))
        output = self.pool(output)
        output = nn.functional.relu(self.bn4(self.conv4(output)))
        output = nn.functional.relu(self.bn5(self.conv5(output)))
        output = output.view(-1, 24*10*10)
        output = self.fc1(output)

        return output

In [35]:
model = CnnModel()

In [36]:
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.001)

In [37]:
# Function to save the model
def save_model():
    path = "./CnnModel.pth"
    torch.save(model.state_dict(), path)

In [38]:
torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

device(type='cuda', index=0)

In [39]:
# Function to test the model with the validation dataset and print the accuracy for the test images
def vaild_accuracy():

    model.eval()
    accuracy = 0.0
    total = 0.0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        for data in valid_loader:
            images, labels = data
            # run the model on the test set to predict labels
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            outputs = model(images)

            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()

    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return accuracy

In [40]:
# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):

    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0

        for i, (images, labels) in enumerate(train_loader, 0):

            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_function(outputs, labels)
            # backpropagation the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:
                # print every 1000 (twice per epoch)
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = vaild_accuracy()
        print('For epoch', epoch + 1,'the test accuracy over the whole test set is %d %%' % accuracy)

        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            save_model()
            best_accuracy = accuracy

In [41]:
num_epochs = 10

In [None]:
train(num_epochs=num_epochs)

The model will be running on cuda:0 device
[1,  1000] loss: 1.653
[1,  2000] loss: 1.297
[1,  3000] loss: 1.193
[1,  4000] loss: 1.122
For epoch 1 the test accuracy over the whole test set is 66 %
[2,  1000] loss: 1.079
[2,  2000] loss: 1.001
[2,  3000] loss: 0.975
[2,  4000] loss: 0.937
For epoch 2 the test accuracy over the whole test set is 70 %
[3,  1000] loss: 0.906
[3,  2000] loss: 0.886
[3,  3000] loss: 0.870
[3,  4000] loss: 0.847
For epoch 3 the test accuracy over the whole test set is 72 %
