In [1]:
 !pip install "opencv-python-headless<4.3"

Collecting opencv-python-headless<4.3
  Downloading opencv_python_headless-4.2.0.34-cp37-cp37m-manylinux1_x86_64.whl (21.6 MB)
[K     |████████████████████████████████| 21.6 MB 1.6 MB/s 
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.2.0.34


In [2]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import time
import copy
import cv2
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
df = pd.read_csv("/content/drive/MyDrive/Data/features_30_sec.csv")
df = df[['filename','label']]

In [5]:
df = df[df['filename'] != "jazz.00054.wav"]

In [6]:
df = df.reset_index()

In [7]:
df.pop('index')

0        0
1        1
2        2
3        3
4        4
      ... 
994    995
995    996
996    997
997    998
998    999
Name: index, Length: 999, dtype: int64

In [8]:
class_name = {}
n = 0
for i in set(df['label']):
    class_name[i] = n
    n+=1
num_classes = n

In [9]:
class_name

{'blues': 3,
 'classical': 9,
 'country': 8,
 'disco': 5,
 'hiphop': 1,
 'jazz': 0,
 'metal': 7,
 'pop': 4,
 'reggae': 6,
 'rock': 2}

In [10]:
df['label'] = df['label'].map(class_name)

In [11]:
for i in range(len(df)):
    temp = df['filename'][i].split(".")
    df['filename'][i] = "Data/images_original/" + temp[0] + "/" + temp[0] + temp[1] + ".png"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [12]:
train, test = train_test_split(df, test_size=0.30, random_state=42, stratify = df['label'])
test, val = train_test_split(test, test_size=0.50, random_state=42, stratify = test['label'])

In [13]:
dataset_sizes = {'train': len(train), 'test': len(test), 'val': len(val)}

In [14]:
class GenreDataset(Dataset):
    """Genre dataset."""

    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.csv = csv_file
        self.transform = transform

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.csv.iloc[idx, 0]
        image = cv2.imread("/content/drive/MyDrive/" + img_name,cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = np.expand_dims(image, axis=-1)
        details = self.csv.iloc[idx, 1:]
        sample = {'image': image, 'label': details[0]}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [15]:
class PreProcessing(object):

    def __init__(self):
        pass

    def __call__(self, sample):
        image, turtle_id = sample['image'], sample['label']
#         h, w = image.shape[:2]
        
        ### ADD PREPROCESSING CODE HERE
#         image = np.array([image])
        
        return [np.transpose(image, (2, 1, 0)), turtle_id]

In [16]:
train_transformed_dataset = GenreDataset(csv_file=train,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))
test_transformed_dataset = GenreDataset(csv_file=test,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))
val_transformed_dataset = GenreDataset(csv_file=val,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))

In [46]:
dataloaders = {'train' : DataLoader(train_transformed_dataset, batch_size=8,
                        shuffle=True, num_workers=0),
              'test' : DataLoader(test_transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=0),
              'val' : DataLoader(val_transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=0)}

In [47]:

from torchvision.utils import make_grid

In [48]:
def show_images(images, nmax=64):
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid((images[0].detach()[:nmax]), nrow=8).permute(1, 2, 0))
def show_batch(dl, nmax=64):
    for images in dl:
        print(images[1].shape)
        show_images(images, nmax)
        break

In [49]:
def show_pics(image):
    print(image.shape)
    plt.imshow(image)
    plt.pause(0.001)  # pause a bit so that plots are updated


In [50]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [51]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
#                 inputs = inputs.type(torch.DoubleTensor)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

### Need to define our model below

In [75]:
# Only one Inception module 

import torch
import torch.nn as nn

def conv(ni, nf, ks=3, stride=1, pad=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=pad, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1, pad=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride, pad), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class InceptionModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.branch1x1 = conv_layer(256*2, 64*2, ks=1, stride=1, pad=0)
        
        self.branch1x1_pool = conv_layer(256*2, 32*2, ks=1, stride=1, pad=0)
        self.branch1x1_final = conv_layer(32*2, 32*2, ks=1, stride=1, pad=0)
        
        self.branch3x3_init = conv_layer(256*2, 128*2, ks=3, stride=1, pad=1)
        self.branch3x3_final = conv_layer(128*2, 128*2, ks=3, stride=1, pad=1)
        
        self.branch5x5_init = conv_layer(256*2, 32*2, ks=5, stride=1, pad=2)
        self.branch5x5_final = conv_layer(32*2, 32*2, ks=5, stride=1, pad=2)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x1 = self.branch1x1(x)
        
        x2 = self.branch1x1_pool(x)
        x3 = self.branch1x1_final(x2)
        
        x4 = self.branch3x3_init(x)
        x5 = self.branch3x3_final(x4)
        
        x6 = self.branch5x5_init(x)
        x7 = self.branch5x5_final(x6)
        
        ans = [x1,x3,x5,x7]
        return self.relu(torch.cat(ans,1))

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model_ft = nn.Sequential(
    conv_layer_averpl(1, 128),
    conv_layer_averpl(128, 256),
    conv_layer_averpl(256, 512),
    InceptionModule(),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(2048, 40),
    nn.Linear(40, 10)
)

In [80]:
# 3 Inception modules with skip connections

import torch
import torch.nn as nn

def conv(ni, nf, ks=3, stride=1, pad=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=pad, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1, pad=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride, pad), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class InceptionModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.branch1x1 = conv_layer(256, 64, ks=1, stride=1, pad=0)
        
        self.branch1x1_pool = conv_layer(256, 32, ks=1, stride=1, pad=0)
        self.branch1x1_final = conv_layer(32, 32, ks=1, stride=1, pad=0)
        
        self.branch3x3_init = conv_layer(256, 128, ks=3, stride=1, pad=1)
        self.branch3x3_final = conv_layer(128, 128, ks=3, stride=1, pad=1)
        
        self.branch5x5_init = conv_layer(256, 32, ks=5, stride=1, pad=2)
        self.branch5x5_final = conv_layer(32, 32, ks=5, stride=1, pad=2)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x1 = self.branch1x1(x)
        
        x2 = self.branch1x1_pool(x)
        x3 = self.branch1x1_final(x2)
        
        x4 = self.branch3x3_init(x)
        x5 = self.branch3x3_final(x4)
        
        x6 = self.branch5x5_init(x)
        x7 = self.branch5x5_final(x6)
        
        ans = [x1,x3,x5,x7]
        return self.relu(torch.cat(ans,1)) + x

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model_ft = nn.Sequential(
    conv_layer_averpl(1, 128),
    conv_layer_averpl(128, 256),
    InceptionModule(),
    InceptionModule(),
    InceptionModule(),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(1024, 40),
    nn.Linear(40, 10)
)

In [87]:
# 3 Inception module

import torch
import torch.nn as nn

def conv(ni, nf, ks=3, stride=1, pad=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=pad, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1, pad=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride, pad), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class InceptionModule_1(nn.Module):
    def __init__(self):
        super().__init__()
        self.branch1x1 = conv_layer(1, 32, ks=1, stride=1, pad=0)
        
        self.branch1x1_pool = conv_layer(1, 16, ks=1, stride=1, pad=0)
        self.branch1x1_final = conv_layer(16, 16, ks=1, stride=1, pad=0)
        
        self.branch3x3_init = conv_layer(1, 64, ks=3, stride=1, pad=1)
        self.branch3x3_final = conv_layer(64, 64, ks=3, stride=1, pad=1)
        
        self.branch5x5_init = conv_layer(1, 16, ks=5, stride=1, pad=2)
        self.branch5x5_final = conv_layer(16, 16, ks=5, stride=1, pad=2)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x1 = self.branch1x1(x)
        
        x2 = self.branch1x1_pool(x)
        x3 = self.branch1x1_final(x2)
        
        x4 = self.branch3x3_init(x)
        x5 = self.branch3x3_final(x4)
        
        x6 = self.branch5x5_init(x)
        x7 = self.branch5x5_final(x6)
        
        ans = [x1,x3,x5,x7]
        return self.relu(torch.cat(ans,1))

class InceptionModule_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.branch1x1 = conv_layer(128, 64, ks=1, stride=1, pad=0)
        
        self.branch1x1_pool = conv_layer(128, 32, ks=1, stride=1, pad=0)
        self.branch1x1_final = conv_layer(32, 32, ks=1, stride=1, pad=0)
        
        self.branch3x3_init = conv_layer(128, 128, ks=3, stride=1, pad=1)
        self.branch3x3_final = conv_layer(128, 128, ks=3, stride=1, pad=1)
        
        self.branch5x5_init = conv_layer(128, 32, ks=5, stride=1, pad=2)
        self.branch5x5_final = conv_layer(32, 32, ks=5, stride=1, pad=2)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x1 = self.branch1x1(x)
        
        x2 = self.branch1x1_pool(x)
        x3 = self.branch1x1_final(x2)
        
        x4 = self.branch3x3_init(x)
        x5 = self.branch3x3_final(x4)
        
        x6 = self.branch5x5_init(x)
        x7 = self.branch5x5_final(x6)
        
        ans = [x1,x3,x5,x7]
        return self.relu(torch.cat(ans,1))

class InceptionModule_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.branch1x1 = conv_layer(256, 128, ks=1, stride=1, pad=0)
        
        self.branch1x1_pool = conv_layer(256, 64, ks=1, stride=1, pad=0)
        self.branch1x1_final = conv_layer(64, 64, ks=1, stride=1, pad=0)
        
        self.branch3x3_init = conv_layer(256, 256, ks=3, stride=1, pad=1)
        self.branch3x3_final = conv_layer(256, 256, ks=3, stride=1, pad=1)
        
        self.branch5x5_init = conv_layer(256, 64, ks=5, stride=1, pad=2)
        self.branch5x5_final = conv_layer(64, 64, ks=5, stride=1, pad=2)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x1 = self.branch1x1(x)
        
        x2 = self.branch1x1_pool(x)
        x3 = self.branch1x1_final(x2)
        
        x4 = self.branch3x3_init(x)
        x5 = self.branch3x3_final(x4)
        
        x6 = self.branch5x5_init(x)
        x7 = self.branch5x5_final(x6)
        
        ans = [x1,x3,x5,x7]
        return self.relu(torch.cat(ans,1))

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model_ft = nn.Sequential(
    InceptionModule_1(),
    InceptionModule_2(),
    InceptionModule_3(),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(2048, 40),
    nn.Linear(40, 10)
)

In [81]:
# model_ft = models.resnet18(pretrained=True) ### Can change this to try out other models available 
# num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Linear(num_ftrs, 10)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [78]:
# 1 inception module
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=20)
# torch.save(model_ft.state_dict(),"weights.pth")

Epoch 0/19
----------
train Loss: 2.0506 Acc: 0.2561
val Loss: 1.7607 Acc: 0.4200

Epoch 1/19
----------
train Loss: 1.7101 Acc: 0.3848
val Loss: 1.5906 Acc: 0.4333

Epoch 2/19
----------
train Loss: 1.5633 Acc: 0.4320
val Loss: 1.5800 Acc: 0.4667

Epoch 3/19
----------
train Loss: 1.4594 Acc: 0.4793
val Loss: 1.9752 Acc: 0.3133

Epoch 4/19
----------
train Loss: 1.3774 Acc: 0.5036
val Loss: 1.3997 Acc: 0.5400

Epoch 5/19
----------
train Loss: 1.3555 Acc: 0.5236
val Loss: 1.3856 Acc: 0.5533

Epoch 6/19
----------
train Loss: 1.2912 Acc: 0.5293
val Loss: 1.4866 Acc: 0.5267

Epoch 7/19
----------
train Loss: 1.1257 Acc: 0.6052
val Loss: 1.3187 Acc: 0.6000

Epoch 8/19
----------
train Loss: 1.1266 Acc: 0.6123
val Loss: 1.3144 Acc: 0.5533

Epoch 9/19
----------
train Loss: 1.0861 Acc: 0.6295
val Loss: 1.2741 Acc: 0.5667

Epoch 10/19
----------
train Loss: 1.0496 Acc: 0.6581
val Loss: 1.2642 Acc: 0.5867

Epoch 11/19
----------
train Loss: 1.0635 Acc: 0.6538
val Loss: 1.3163 Acc: 0.5800

Ep

In [79]:
torch.save(model_ft.state_dict(),"/content/drive/MyDrive/one_inception_weights.pth")

In [82]:
# 3 inception module with skip connections
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=20)
# torch.save(model_ft.state_dict(),"weights.pth")

Epoch 0/19
----------
train Loss: 2.1270 Acc: 0.2117
val Loss: 1.8364 Acc: 0.3600

Epoch 1/19
----------
train Loss: 1.7387 Acc: 0.3591
val Loss: 2.2074 Acc: 0.2867

Epoch 2/19
----------
train Loss: 1.6429 Acc: 0.3991
val Loss: 1.8803 Acc: 0.4000

Epoch 3/19
----------
train Loss: 1.6193 Acc: 0.4034
val Loss: 1.6666 Acc: 0.4400

Epoch 4/19
----------
train Loss: 1.4760 Acc: 0.4592
val Loss: 2.0453 Acc: 0.3333

Epoch 5/19
----------
train Loss: 1.4376 Acc: 0.4764
val Loss: 1.8185 Acc: 0.3533

Epoch 6/19
----------
train Loss: 1.3874 Acc: 0.5050
val Loss: 1.5987 Acc: 0.4800

Epoch 7/19
----------
train Loss: 1.2134 Acc: 0.5508
val Loss: 1.3199 Acc: 0.5667

Epoch 8/19
----------
train Loss: 1.1750 Acc: 0.5851
val Loss: 1.2979 Acc: 0.5867

Epoch 9/19
----------
train Loss: 1.1452 Acc: 0.5994
val Loss: 1.3074 Acc: 0.5867

Epoch 10/19
----------
train Loss: 1.1062 Acc: 0.6037
val Loss: 1.3447 Acc: 0.6000

Epoch 11/19
----------
train Loss: 1.1423 Acc: 0.5980
val Loss: 1.3106 Acc: 0.5800

Ep

In [83]:
torch.save(model_ft.state_dict(),"/content/drive/MyDrive/inception_with_skip_weights.pth")

In [88]:
# model_ft = models.resnet18(pretrained=True) ### Can change this to try out other models available 
# num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Linear(num_ftrs, 10)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [89]:
# 3 inception module with skip connections
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=20)
# torch.save(model_ft.state_dict(),"weights.pth")

Epoch 0/19
----------


RuntimeError: ignored

In [None]:
torch.save(model_ft.state_dict(),"/content/drive/MyDrive/3_cons_inception_weights.pth")

In [None]:
model_ft.load_state_dict(torch.load("weights.pth"))
model_ft = model_ft.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
def test_model(model, criterion, optimizer, scheduler):

    model.eval()   # Set model to evaluate mode

    t_output = []
    t_pred = []
    y_test = []
    top_k = []
    # Iterate over data.
    i = 1
    for inputs, labels in dataloaders['test']:
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        y_test.append(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            t_output.append(outputs)
            t_pred.append(preds)
            temp1, temp2 = outputs.topk(5)
            top_k.append(temp2)

    y_test = torch.cat(y_test).cpu().detach().numpy() 
    y_test_num = torch.cat(t_pred).cpu().detach().numpy() 
    y_pred = torch.cat(top_k).cpu().detach().numpy() 
    print('\nConfusion Matrix')
    conf_mt = confusion_matrix(y_test_num, y_test)
    print(conf_mt)
    plt.matshow(conf_mt)
    plt.show()
    print('\nClassification Report')
    print(classification_report(y_test_num, y_test))

In [None]:
test_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler)

RuntimeError: CUDA out of memory. Tried to allocate 74.00 MiB (GPU 0; 8.00 GiB total capacity; 6.34 GiB already allocated; 52.79 MiB free; 6.46 GiB reserved in total by PyTorch)