In [1]:
 !pip install "opencv-python-headless<4.3"



In [2]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import time
import copy
import cv2
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("Data/features_30_sec.csv")
df = df[['filename','label']]

In [4]:
df = df[df['filename'] != "jazz.00054.wav"]

In [5]:
df = df.reset_index()

In [6]:
df.pop('index')

0        0
1        1
2        2
3        3
4        4
      ... 
994    995
995    996
996    997
997    998
998    999
Name: index, Length: 999, dtype: int64

In [7]:
class_name = {}
n = 0
for i in set(df['label']):
    class_name[i] = n
    n+=1
num_classes = n

In [8]:
class_name

{'reggae': 0,
 'country': 1,
 'blues': 2,
 'metal': 3,
 'disco': 4,
 'pop': 5,
 'hiphop': 6,
 'rock': 7,
 'jazz': 8,
 'classical': 9}

In [9]:
df['label'] = df['label'].map(class_name)

In [10]:
for i in range(len(df)):
    temp = df['filename'][i].split(".")
    df['filename'][i] = "Data/images_original/" + temp[0] + "/" + temp[0] + temp[1] + ".png"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [11]:
train, test = train_test_split(df, test_size=0.30, random_state=42, stratify = df['label'])
test, val = train_test_split(test, test_size=0.50, random_state=42, stratify = test['label'])

In [12]:
dataset_sizes = {'train': len(train), 'test': len(test), 'val': len(val)}

In [13]:
class GenreDataset(Dataset):
    """Genre dataset."""

    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.csv = csv_file
        self.transform = transform

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.csv.iloc[idx, 0]
        image = cv2.imread(img_name,cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = np.expand_dims(image, axis=-1)
        details = self.csv.iloc[idx, 1:]
        sample = {'image': image, 'label': details[0]}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [14]:
class PreProcessing(object):

    def __init__(self):
        pass

    def __call__(self, sample):
        image, turtle_id = sample['image'], sample['label']
#         h, w = image.shape[:2]
        
        ### ADD PREPROCESSING CODE HERE
#         image = np.array([image])
        
        return [np.transpose(image, (2, 1, 0)), turtle_id]

In [15]:
train_transformed_dataset = GenreDataset(csv_file=train,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))
test_transformed_dataset = GenreDataset(csv_file=test,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))
val_transformed_dataset = GenreDataset(csv_file=val,
                                               transform=transforms.Compose([
                                               PreProcessing()
                                           ]))

In [16]:
dataloaders = {'train' : DataLoader(train_transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=0),
              'test' : DataLoader(test_transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=0),
              'val' : DataLoader(val_transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=0)}

In [17]:

from torchvision.utils import make_grid

In [18]:
def show_images(images, nmax=64):
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid((images[0].detach()[:nmax]), nrow=8).permute(1, 2, 0))
def show_batch(dl, nmax=64):
    for images in dl:
        print(images[1].shape)
        show_images(images, nmax)
        break

In [19]:
def show_pics(image):
    print(image.shape)
    plt.imshow(image)
    plt.pause(0.001)  # pause a bit so that plots are updated


In [20]:
next(iter(train_transformed_dataset[0])).shape

(1, 339, 221)

In [21]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [22]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
#                 inputs = inputs.type(torch.DoubleTensor)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

### Need to define our model below

In [23]:
import torch
import torch.nn as nn

def conv(ni, nf, ks=3, stride=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride=stride), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class ResBlock(nn.Module):
    def __init__(self, nf):
        super().__init__()
        self.conv1 = conv_layer(nf, nf)
        self.conv2 = conv_layer(nf, nf)
    def forward(self, x):
        return x + self.conv2(self.conv1(x))

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model_ft = nn.Sequential(
    conv_layer_averpl(1, 64),
    ResBlock(64),
    conv_layer_averpl(64, 64),
    ResBlock(64),
    conv_layer_averpl(64, 128),
    ResBlock(128),
    conv_layer_averpl(128, 256),
    ResBlock(256),
    conv_layer_averpl(256, 512),
    ResBlock(512),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(2048, 40),
    nn.Linear(40, 10)
)

In [24]:
# model_ft = models.resnet18(pretrained=True) ### Can change this to try out other models available 
# num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Linear(num_ftrs, 10)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [25]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=20)
# torch.save(model_ft.state_dict(),"weights.pth")

Epoch 0/19
----------
train Loss: 2.1700 Acc: 0.2546
val Loss: 1.8724 Acc: 0.3200

Epoch 1/19
----------
train Loss: 1.9320 Acc: 0.3004
val Loss: 3.4092 Acc: 0.2333

Epoch 2/19
----------
train Loss: 1.6450 Acc: 0.4077
val Loss: 1.6724 Acc: 0.4800

Epoch 3/19
----------
train Loss: 1.5858 Acc: 0.4378
val Loss: 1.5381 Acc: 0.4600

Epoch 4/19
----------
train Loss: 1.4417 Acc: 0.4964
val Loss: 1.5954 Acc: 0.5133

Epoch 5/19
----------
train Loss: 1.4035 Acc: 0.4936
val Loss: 1.5124 Acc: 0.4667

Epoch 6/19
----------
train Loss: 1.2555 Acc: 0.5637
val Loss: 1.7842 Acc: 0.5200

Epoch 7/19
----------
train Loss: 1.0396 Acc: 0.6309
val Loss: 1.0671 Acc: 0.6267

Epoch 8/19
----------
train Loss: 0.9263 Acc: 0.6924
val Loss: 1.0247 Acc: 0.6467

Epoch 9/19
----------
train Loss: 0.9069 Acc: 0.6681
val Loss: 1.2370 Acc: 0.5667

Epoch 10/19
----------
train Loss: 0.8753 Acc: 0.6953
val Loss: 1.1681 Acc: 0.5933

Epoch 11/19
----------
train Loss: 0.8485 Acc: 0.7053
val Loss: 1.0388 Acc: 0.6733

Ep

In [26]:
torch.save(model_ft.state_dict(),"weights.pth")

In [27]:
import torch
import torch.nn as nn

def conv(ni, nf, ks=3, stride=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride=stride), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class ResBlock(nn.Module):
    def __init__(self, nf):
        super().__init__()
        self.conv1 = conv_layer(nf, nf)
        self.conv2 = conv_layer(nf, nf)
    def forward(self, x):
        return x + self.conv2(self.conv1(x))

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model_ft = nn.Sequential(
    conv_layer_averpl(1, 64),
    ResBlock(64),
    conv_layer_averpl(64, 64),
    ResBlock(64),
    conv_layer_averpl(64, 128),
    ResBlock(128),
    conv_layer_averpl(128, 256),
    ResBlock(256),
    conv_layer_averpl(256, 512),
    ResBlock(512),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(2048, 40),
    nn.Linear(40, 10)
)

In [28]:
model_ft.load_state_dict(torch.load("weights.pth"))
model_ft = model_ft.to(device)

In [29]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [30]:
def test_model(model, criterion, optimizer, scheduler):

    model.eval()   # Set model to evaluate mode

    t_output = []
    t_pred = []
    y_test = []
    top_k = []
    # Iterate over data.
    i = 1
    for inputs, labels in dataloaders['test']:
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        y_test.append(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            t_output.append(outputs)
            t_pred.append(preds)
            temp1, temp2 = outputs.topk(5)
            top_k.append(temp2)

    y_test = torch.cat(y_test).cpu().detach().numpy() 
    y_test_num = torch.cat(t_pred).cpu().detach().numpy() 
    y_pred = torch.cat(top_k).cpu().detach().numpy() 
    print('\nConfusion Matrix')
    conf_mt = confusion_matrix(y_test_num, y_test)
    print(conf_mt)
    plt.matshow(conf_mt)
    plt.show()
    print('\nClassification Report')
    print(classification_report(y_test_num, y_test))

In [31]:
test_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler)

RuntimeError: CUDA out of memory. Tried to allocate 74.00 MiB (GPU 0; 8.00 GiB total capacity; 6.34 GiB already allocated; 52.79 MiB free; 6.46 GiB reserved in total by PyTorch)