### Imports


In [1]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms

import time
import copy
import cv2
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix 
import matplotlib.pyplot as plt

from itertools import product

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x7fe6fc22fdd0>

### Load data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

!unzip drive/MyDrive/Data_new.zip

drive.flush_and_unmount()

Mounted at /content/drive
Archive:  drive/MyDrive/Data_new.zip
   creating: Data_new/images_original/
   creating: Data_new/images_original/blues/
  inflating: Data_new/images_original/blues/blues00000.png  
  inflating: Data_new/images_original/blues/blues00001.png  
  inflating: Data_new/images_original/blues/blues00002.png  
  inflating: Data_new/images_original/blues/blues00003.png  
  inflating: Data_new/images_original/blues/blues00004.png  
  inflating: Data_new/images_original/blues/blues00005.png  
  inflating: Data_new/images_original/blues/blues00006.png  
  inflating: Data_new/images_original/blues/blues00007.png  
  inflating: Data_new/images_original/blues/blues00008.png  
  inflating: Data_new/images_original/blues/blues00009.png  
  inflating: Data_new/images_original/blues/blues00010.png  
  inflating: Data_new/images_original/blues/blues00011.png  
  inflating: Data_new/images_original/blues/blues00012.png  
  inflating: Data_new/images_original/blues/blues00013.png  

In [9]:
df = pd.read_csv("features_30_sec.csv")
df = df[['filename','label']]

In [10]:
df = df[df['filename'] != "jazz.00054.wav"]
df = df.reset_index()
df.pop('index')

0        0
1        1
2        2
3        3
4        4
      ... 
994    994
995    995
996    996
997    997
998    998
Name: index, Length: 999, dtype: int64

In [11]:
class_name = {}
n = 0
for i in df['label'].unique():
    class_name[i] = n
    n+=1
num_classes = n

In [13]:
df['label'] = df['label'].map(class_name)

In [12]:
for i in range(len(df)):
    temp = df['filename'][i].split(".")
    df['filename'][i] = "Data_new/images_original/" + temp[0] + "/" + temp[0] + temp[1] + ".png"

In [14]:
train, test = train_test_split(df, test_size=0.20, random_state=42, stratify = df['label'])
test, val = train_test_split(test, test_size=0.50, random_state=42, stratify = test['label'])

In [15]:
dataset_sizes = {'train': len(train), 'test': len(test), 'val': len(val)}
print(dataset_sizes)

{'train': 799, 'test': 100, 'val': 100}


In [16]:
class GenreDataset(Dataset):
    """Genre dataset."""

    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.csv = csv_file
        self.transform = transform

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.csv.iloc[idx, 0]
        image = cv2.imread(img_name,cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = np.expand_dims(image, axis=-1)
        details = self.csv.iloc[idx, 1:]
        sample = {'image': image, 'label': details[0]}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [17]:
class PreProcessing(object):

    def __init__(self):
        pass

    def __call__(self, sample):
        image, turtle_id = sample['image'], sample['label']
        h, w = image.shape[:2]
        
        ### ADD PREPROCESSING CODE HERE
        
        return [torch.Tensor(image.transpose(2,0,1)), turtle_id]

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Model

In [28]:
# image shape is (219, 338, 1)

class CustomCNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.embed = nn.Sequential(
        self.conv3x3(1, 16),
        nn.MaxPool2d((3,3), (2,2)),

        self.conv3x3(16, 32),
        nn.MaxPool2d((2,3), (2,2)),

        self.conv3x3(32, 64),
        nn.MaxPool2d((5,6), (5,6)),

        self.conv3x3(64, 128),
        nn.MaxPool2d((6,6), (5,4)),

        nn.Flatten(),
        nn.Linear(2*128, 128),
        nn.ReLU()
        )

    self.classify = nn.Sequential(
        nn.Linear(128, 10)
        )

    self.apply(self._init_weights)

  def conv3x3(self, in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU()
        ) 

  def forward(self, input):
    embedding = self.embed(input)
    output = self.classify(embedding)
    return output

  def _init_weights(self, layer):
    if isinstance(layer, (nn.Linear, nn.Conv2d)):
      nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')

### Hyperparameter tuning

In [20]:
def get_optimizer(name, params, lr):
  if name=='Adam':
    return optim.Adam(params, lr)
  else:
    return optim.SGD(params, lr, momentum=0.9)

In [21]:
def reset_weights(m):
  '''
    Try resetting model weights to avoid weight leakage.
  '''
  for layer in m.children():
   if hasattr(layer, 'reset_parameters'):
    layer.reset_parameters()

In [22]:
def cross_validate(model, dataset, k_folds, hyperparams, num_epochs=10):
    results = {}
    criterion = nn.CrossEntropyLoss()

    for fold, (train_ids, test_ids) in enumerate(k_folds):
        print(f'FOLD {fold+1}')
        
        # Define data loaders for training and testing data in this fold
        train_subsampler = SubsetRandomSampler(train_ids)
        train_loader = DataLoader(dataset, batch_size=16, sampler = train_subsampler)
        test_subsampler = SubsetRandomSampler(test_ids)
        test_loader = DataLoader(dataset, batch_size=16, sampler = test_subsampler)
        
        # Reset weights for each fold
        model.apply(reset_weights)

        # Training configuration
        optimizer = get_optimizer(hyperparams['optimizer'], model.parameters(), hyperparams['lr'])
        scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

        # Train for given epochs
        total = len(train_ids)
        model.train()
        for epoch in range(num_epochs):
            epoch_loss, epoch_acc = 0.0, 0

            # Iterate over data
            for inputs, labels in train_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                with torch.set_grad_enabled(True):
                  outputs = model(inputs.float())
                  _, preds = torch.max(outputs, 1)
                  loss = criterion(outputs, labels)
                  # backward
                  loss.backward()
                  optimizer.step()

                scheduler.step()
                # statistics
                epoch_loss += loss.item() * inputs.size(0)
                epoch_acc += torch.sum(preds == labels.data)

            epoch_loss = epoch_loss/total
            epoch_acc = epoch_acc.double()/total
            if (epoch+1)%5 == 0: 
              print('Epoch {}/{} --- Train Loss: {:.3f} Acc: {:.3f}' \
                    .format(epoch+1, num_epochs, epoch_loss, epoch_acc))

        # Evaluation for this fold
        correct, total = 0, len(test_ids)
        model.eval()
        with torch.no_grad():
          for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)

        correct = correct.cpu().detach().numpy()
        eval_acc = correct/total
        results[fold+1] = {'train':epoch_acc, 'eval':eval_acc}
      
    # Print results for all folds
    print('\nCross validation results')
    avg = 0.0
    for key, value in results.items():
      print('Fold {}: Train acc = {:.3f} Val acc = {:.3f}'.format(key, value['train'], value['eval']))
      avg += value['eval']
    avg = avg/len(results)
    print(hyperparams)
    print('Average val acc: {:.3f} \n'.format(avg))
    return avg

In [23]:
# Hyperparameters
optimizer = ['SGD', 'Adam']
lr = [0.001, 0.0005, 0.0001]
num_filters = [(16,32,64,128),(32,64,128,256)]


In [24]:
hp_values = list(product(optimizer, lr))
hp = ['optimizer','lr']
hp_dict = [dict(zip(hp,i)) for i in hp_values]
hp_dict

[{'lr': 0.001, 'optimizer': 'SGD'},
 {'lr': 0.0005, 'optimizer': 'SGD'},
 {'lr': 0.0001, 'optimizer': 'SGD'},
 {'lr': 0.001, 'optimizer': 'Adam'},
 {'lr': 0.0005, 'optimizer': 'Adam'},
 {'lr': 0.0001, 'optimizer': 'Adam'}]

In [25]:
# Define the cross validator
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Transform dataset
hp_transformed_dataset = GenreDataset(csv_file=pd.concat([train, val]), 
                                      transform=transforms.Compose([PreProcessing()])
                                      )

In [29]:
model = CustomCNN()
torch.cuda.empty_cache() 
model = model.to(device)

hp_results = []

for hyperparams in hp_dict:
  print(hyperparams)
  k_folds = skf.split(hp_transformed_dataset.csv, hp_transformed_dataset.csv['label'])
  res = cross_validate(model, hp_transformed_dataset, k_folds, hyperparams)
  hp_results.append((hyperparams, res))

{'optimizer': 'SGD', 'lr': 0.001}
FOLD 1
Epoch 5/10 --- Train Loss: 2.274 Acc: 0.106
Epoch 10/10 --- Train Loss: 2.273 Acc: 0.103
FOLD 2
Epoch 5/10 --- Train Loss: 2.300 Acc: 0.161
Epoch 10/10 --- Train Loss: 2.298 Acc: 0.153
FOLD 3
Epoch 5/10 --- Train Loss: 2.276 Acc: 0.159
Epoch 10/10 --- Train Loss: 2.277 Acc: 0.171
FOLD 4
Epoch 5/10 --- Train Loss: 2.283 Acc: 0.161
Epoch 10/10 --- Train Loss: 2.284 Acc: 0.146
FOLD 5
Epoch 5/10 --- Train Loss: 2.276 Acc: 0.126
Epoch 10/10 --- Train Loss: 2.276 Acc: 0.132

Cross validation results
Fold 1: Train acc = 0.103 Val acc = 0.089
Fold 2: Train acc = 0.153 Val acc = 0.156
Fold 3: Train acc = 0.171 Val acc = 0.150
Fold 4: Train acc = 0.146 Val acc = 0.161
Fold 5: Train acc = 0.132 Val acc = 0.128
{'optimizer': 'SGD', 'lr': 0.001}
Average val acc: 0.137 

{'optimizer': 'SGD', 'lr': 0.0005}
FOLD 1
Epoch 5/10 --- Train Loss: 2.318 Acc: 0.100
Epoch 10/10 --- Train Loss: 2.314 Acc: 0.100
FOLD 2
Epoch 5/10 --- Train Loss: 2.319 Acc: 0.114
Epoch 10/

In [30]:
hp_results

[({'lr': 0.001, 'optimizer': 'SGD'}, 0.1368094351334575),
 ({'lr': 0.0005, 'optimizer': 'SGD'}, 0.10898199875853506),
 ({'lr': 0.0001, 'optimizer': 'SGD'}, 0.07788330229671012),
 ({'lr': 0.001, 'optimizer': 'Adam'}, 0.2702607076350093),
 ({'lr': 0.0005, 'optimizer': 'Adam'}, 0.24471135940409683),
 ({'lr': 0.0001, 'optimizer': 'Adam'}, 0.220235878336437)]