# Preamble

In [6]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [1]:
# json
import json

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio

import matplotlib.pyplot as plt
import librosa

import librosa.display
import IPython.display as ipd

import numpy as np
import pickle
import random

In [2]:
import torch
import librosa
from torch_specinv import griffin_lim
from torch_specinv.metrics import spectral_convergence as SC

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [3]:
%load_ext autoreload

In [7]:
from tqdm import tqdm
from sklearn import metrics

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchaudio

from classification.trainer.GeneralPLModule import GeneralPLModule

In [8]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


# Load data

In [9]:
from datasets.datasethandler import DatasetHandler 
datasetHandler = DatasetHandler()

# Validating

In [10]:
def reportScore(y_true, y_pred):
    print("\tAccuracy:\t" + str(metrics.accuracy_score(y_true,y_pred)))
    print("\tPrecision:\t" + str(metrics.precision_score(y_true,y_pred)))
    print("\tRecall:   \t" + str(metrics.recall_score(y_true,y_pred)))
    print("\tF1-score:\t" + str(metrics.f1_score(y_true,y_pred)))
    #tn, fp, fn, tp = metrics.confusion_matrix(y_true,y_pred).ravel()

In [11]:
def validate(model):
    valid_loader = model.get_dataloader("validation", shuffle=False, batch_size=1) 
    model.eval()

    y_true = []
    y_pred = []
    with torch.no_grad():
        for data in tqdm(valid_loader):
            data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
            outputs = model(data)
            y_pred.append(torch.max(outputs.data, 1)[1].item())
            y_true.append(data['label'].item())
            
    reportScore(y_true, y_pred)

# Model definition

In [12]:
class Print(nn.Module):
    def __init__(self):
        super(Print, self).__init__()

    def forward(self, x):
        print(x.shape)
        return x

In [38]:
class AblationModel(nn.Module):
    
    def __init__(self, hparams):
        super(AblationModel, self).__init__()
        self.datasets = {}
        
        self.normal_spectrogram = hparams["normal_spectrogram"]
        
        self.windowsize = 800
        self.window = torch.hann_window(self.windowsize).cuda()
        
        self.convs = nn.Sequential(
                    nn.BatchNorm2d(1),
                    nn.Conv2d(1, 10, kernel_size=10,stride=2),
                    nn.BatchNorm2d(10),
                    nn.PReLU(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Dropout(p=hparams["p_dropout"], inplace=False),
                    #Print(),
                    nn.Conv2d(10, 20, kernel_size=10,stride=1),
                    nn.BatchNorm2d(20),
                    nn.PReLU(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Dropout(p=hparams["p_dropout"], inplace=False),
                    #Print(),
                    nn.Conv2d(20, 40, kernel_size=10,stride=1),
                    nn.BatchNorm2d(40),
                    nn.PReLU(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Dropout(p=hparams["p_dropout"], inplace=False),
                    #Print(),
                    nn.Conv2d(40, 80, kernel_size=2,stride=1),
                    nn.BatchNorm2d(80),
                    nn.PReLU(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Dropout(p=hparams["p_dropout"], inplace=False),
                    #Print()
                )
        
        self.dense = nn.Sequential(                 
                        nn.Linear(80, hparams["n_hidden"]),
                        nn.PReLU(),
                        nn.Dropout(p=hparams["p_dropout"], inplace=False),
                        nn.Linear(hparams["n_hidden"], 2)  
                    )

    def forward(self, batch):
        x = batch['audio']
        if self.normal_spectrogram:
            x = torchaudio.transforms.Spectrogram().cuda()(x)
        else:
            x = torchaudio.transforms.MelSpectrogram().cuda()(x)

        x = x.unsqueeze(1).float()
        x = self.convs(x)
        x = x.view(x.shape[0], x.shape[1], -1)
        x = F.avg_pool1d(x, kernel_size=x.size()[2:]).squeeze(2)
        x = self.dense(x)
        return F.log_softmax(x,dim=1)

class AblationModelPLModule(GeneralPLModule):

    def __init__(self, hparams):
        super().__init__(hparams)
        self.model = AblationModel(hparams)
        
    def dataset_info(self):
        dataset_type = {"sample_rate": 8000}
        dataset_params = {"fixed_padding": True}
        return dataset_type, dataset_params

In [39]:
_ = test(normal_spectrogram=True)

In [37]:
_ = test(normal_spectrogram=True)

# Training

In [15]:
def test(normal_spectrogram=True):
    hparams = {
        "batch_size": 24,
        "learning_rate": 0.002,
        "weight_decay": 0.01,
        "p_dropout": 0.1,
        "n_hidden": 100,
        "lr_decay": 0.95,
        "normal_spectrogram": normal_spectrogram
    }

    model = AblationModelPLModule(hparams).to(device)
    model.float()

    datasetHandler.load(model, "training")
    datasetHandler.load(model, "validation")

    x = next(iter(model.get_dataloader("training", batch_size=32)))
    x['audio'] = x['audio'].to(device)
    
    return model(x)

In [148]:
from classification.models.AblationModel import AblationModelPLModule

In [150]:
normal_spectrogram = False

torch.cuda.empty_cache()

hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 0.95,
    "n_hidden": 20,
    "p_dropout": 0.1,
    "normal_spectrogram": normal_spectrogram
}

model = AblationModelPLModule(hparams).to(device)
model.float()

datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hparams['learning_rate'])

for epoch in range(12):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(model.get_dataloader("training", batch_size=hparams['batch_size'])):
        data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
        
        model.train()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(data)

        loss = criterion(outputs, data['label'])
        loss.backward()
        optimizer.step()
        
        # track statistics
        running_loss += loss.item()
    #print(outputs[:10])
    
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / (i+1)))
    validate(model)
    
print('Finished Training')

print("Validate:")
validate(model)

  2%|▏         | 42/1687 [00:00<00:03, 412.45it/s]

[1] loss: 0.514


100%|██████████| 1687/1687 [00:04<00:00, 404.58it/s]


	Accuracy:	0.7747480735032602
	Precision:	0.862012987012987
	Recall:   	0.6428571428571429
	F1-score:	0.7364771151178918


  3%|▎         | 43/1687 [00:00<00:03, 423.84it/s]

[2] loss: 0.456


100%|██████████| 1687/1687 [00:04<00:00, 406.90it/s]


	Accuracy:	0.7783046828689982
	Precision:	0.8804713804713805
	Recall:   	0.6331719128329297
	F1-score:	0.7366197183098592


  2%|▏         | 41/1687 [00:00<00:04, 408.19it/s]

[3] loss: 0.432


100%|██████████| 1687/1687 [00:04<00:00, 406.17it/s]


	Accuracy:	0.7860106698280972
	Precision:	0.8881469115191987
	Recall:   	0.6440677966101694
	F1-score:	0.7466666666666666


  2%|▏         | 42/1687 [00:00<00:04, 410.33it/s]

[4] loss: 0.411


 38%|███▊      | 633/1687 [00:01<00:02, 411.04it/s]


KeyboardInterrupt: 

In [146]:
validate(model)

100%|██████████| 1687/1687 [00:03<00:00, 529.04it/s]

	Accuracy:	0.8043864848844102
	Precision:	0.918918918918919
	Recall:   	0.6585956416464891
	F1-score:	0.767277856135402





In [None]:
#model_state_dict_path = "/nfs/students/summer-term-2020/project-4/data/models/SpectrogramBasedCNN.pt"
#torch.save(model.state_dict(), model_state_dict_path)

# Backprop

In [25]:
hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 1
}

model = CRNNPLModule(hparams).cuda()

In [26]:
datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

In [27]:
x = next(iter(model.get_dataloader("training", batch_size=32)))
x['audio'], x['label'] = x['audio'].to(device), x['label'].to(device)

In [29]:
model.eval()
model.zero_grad()
x['audio'].requires_grad_()

outputs = model(x)

loss = nn.CrossEntropyLoss()(outputs, x['label'])
loss.backward()
print(x['audio'].grad)

torch.Size([32, 481489])
torch.Size([32, 9, 128])
torch.Size([9, 32, 128])
torch.Size([32, 64])
torch.Size([32, 2])
tensor([[-9.9304e-08, -1.9802e-07, -1.9548e-07,  ..., -4.8839e-11,
         -4.5564e-11, -2.2173e-11],
        [-1.1339e-10, -2.2373e-10, -2.1470e-10,  ..., -8.9343e-11,
         -7.7258e-11, -3.6642e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.8451e-09, -1.5406e-08, -1.3339e-08,  ..., -2.0062e-13,
         -2.0022e-13, -1.0031e-13],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7652e-08, -3.4504e-08, -3.2775e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')


# Debugging dataset handler

In [3]:
x = torch.load("/nfs/students/summer-term-2020/project-4/data/models/crnn.pth")

In [None]:
datasetHandler = DatasetHandler()

In [None]:
from classification.models.M5 import M5PLModule

hparams = {
    "batch_size": 2,
    "learning_rate": 0.001,
    "weight_decay": 0,
    "lr_decay": 1
}

model = M5PLModule(hparams)

datasetHandler.load_datasets(model)

In [None]:
for x in model.get_dataloader("training", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("validation", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("testing", batch_size=32):
    pass