# Preamble

In [1]:
# json
import json

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio

import matplotlib.pyplot as plt
import librosa

import librosa.display
import IPython.display as ipd

import numpy as np
import pickle
import random

In [2]:
import torch
import librosa
from torch_specinv import griffin_lim
from torch_specinv.metrics import spectral_convergence as SC

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [3]:
%load_ext autoreload

In [4]:
from tqdm import tqdm
from sklearn import metrics

In [5]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [6]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load data

In [7]:
from datasets.datasethandler import DatasetHandler 
datasetHandler = DatasetHandler()

# Validating

In [8]:
def reportScore(y_true, y_pred):
    print("\tAccuracy:\t" + str(metrics.accuracy_score(y_true,y_pred)))
    print("\tPrecision:\t" + str(metrics.precision_score(y_true,y_pred)))
    print("\tRecall:   \t" + str(metrics.recall_score(y_true,y_pred)))
    print("\tF1-score:\t" + str(metrics.f1_score(y_true,y_pred)))
    #tn, fp, fn, tp = metrics.confusion_matrix(y_true,y_pred).ravel()

In [9]:
def validate(model):
    valid_loader = model.get_dataloader("validation", shuffle=False) 
    model.eval()

    y_true = []
    y_pred = []
    with torch.no_grad():
        for data in tqdm(valid_loader):
            data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
            outputs = model(data)

            y_pred.append(torch.max(outputs.data, 1)[1].item())
            y_true.append(data['label'].item())
            
    reportScore(y_true, y_pred)

# Training

In [10]:
def test():
    hparams = {
        "batch_size": 24,
        "learning_rate": 0.002,
        "weight_decay": 0.01,
        "lr_decay": 0.95
    }

    model = CRNNPLModule(hparams).to(device)
    model.float()

    datasetHandler.load(model, "training")
    datasetHandler.load(model, "validation")

    x = next(iter(model.get_dataloader("training", batch_size=32)))
    x['audio'] = x['audio'].to(device)
    
    return model(x)

In [11]:
from classification.models.CRNN import CRNNPLModule
_ = test()

Loading cached training data of dataset 0 from /nfs/students/summer-term-2020/project-4/data/dataset1/dataset_48k/
Loading cached validation data of dataset 0 from /nfs/students/summer-term-2020/project-4/data/dataset1/dataset_48k/


In [None]:
torch.cuda.empty_cache()

hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 0.95
}

model = CRNNPLModule(hparams).to(device)
model.float()

datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hparams['learning_rate'])

for epoch in range(12):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(model.get_dataloader("training", batch_size=hparams['batch_size'])):
        data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
        
        model.train()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(data)

        loss = criterion(outputs, data['label'])
        loss.backward()
        optimizer.step()
        
        # track statistics
        running_loss += loss.item()
    #print(outputs[:10])
    
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / (i+1)))
    validate(model)
    
print('Finished Training')

print("Validate:")
validate(model)

  1%|▏         | 25/1687 [00:00<00:06, 241.50it/s]

[1] loss: 0.450


100%|██████████| 1687/1687 [00:07<00:00, 236.71it/s]


	Accuracy:	0.8488441019561351
	Precision:	0.8182831661092531
	Recall:   	0.8886198547215496
	F1-score:	0.8520023215322112


  2%|▏         | 27/1687 [00:00<00:06, 267.48it/s]

[2] loss: 0.400


100%|██████████| 1687/1687 [00:06<00:00, 263.17it/s]


	Accuracy:	0.8790752815649081
	Precision:	0.8702380952380953
	Recall:   	0.8849878934624698
	F1-score:	0.8775510204081632


  1%|▏         | 25/1687 [00:00<00:06, 241.91it/s]

[3] loss: 0.389


100%|██████████| 1687/1687 [00:07<00:00, 235.67it/s]


	Accuracy:	0.8737403675163011
	Precision:	0.8751529987760098
	Recall:   	0.8656174334140436
	F1-score:	0.8703590992087644


  2%|▏         | 27/1687 [00:00<00:06, 267.93it/s]

[4] loss: 0.361


100%|██████████| 1687/1687 [00:06<00:00, 262.90it/s]


	Accuracy:	0.8672199170124482
	Precision:	0.8888888888888888
	Recall:   	0.8329297820823245
	F1-score:	0.86


  2%|▏         | 26/1687 [00:00<00:06, 259.99it/s]

[5] loss: 0.358


100%|██████████| 1687/1687 [00:06<00:00, 263.38it/s]


	Accuracy:	0.8790752815649081
	Precision:	0.8624708624708625
	Recall:   	0.8958837772397095
	F1-score:	0.8788598574821853


  1%|▏         | 25/1687 [00:00<00:06, 242.19it/s]

[6] loss: 0.340


100%|██████████| 1687/1687 [00:07<00:00, 236.61it/s]


	Accuracy:	0.8749259039715471
	Precision:	0.8435754189944135
	Recall:   	0.914043583535109
	F1-score:	0.8773968622893666


  2%|▏         | 27/1687 [00:00<00:06, 267.74it/s]

[7] loss: 0.332


100%|██████████| 1687/1687 [00:06<00:00, 263.62it/s]


	Accuracy:	0.8844101956135151
	Precision:	0.868144690781797
	Recall:   	0.9007263922518159
	F1-score:	0.8841354723707665


  1%|▏         | 25/1687 [00:00<00:06, 241.81it/s]

[8] loss: 0.326


100%|██████████| 1687/1687 [00:07<00:00, 236.49it/s]


	Accuracy:	0.8814463544754001
	Precision:	0.8699763593380615
	Recall:   	0.8910411622276029
	F1-score:	0.8803827751196173


  2%|▏         | 27/1687 [00:00<00:06, 268.30it/s]

[9] loss: 0.321


100%|██████████| 1687/1687 [00:06<00:00, 263.00it/s]


	Accuracy:	0.8861885002963841
	Precision:	0.8635321100917431
	Recall:   	0.9116222760290557
	F1-score:	0.8869257950530035


  1%|▏         | 25/1687 [00:00<00:06, 241.91it/s]

[10] loss: 0.314


100%|██████████| 1687/1687 [00:07<00:00, 236.86it/s]


	Accuracy:	0.8879668049792531
	Precision:	0.8615209988649262
	Recall:   	0.9188861985472155
	F1-score:	0.8892794376098418


  2%|▏         | 27/1687 [00:00<00:06, 269.07it/s]

[11] loss: 0.308


100%|██████████| 1687/1687 [00:06<00:00, 263.49it/s]


	Accuracy:	0.8885595732068761
	Precision:	0.8977556109725686
	Recall:   	0.8716707021791767
	F1-score:	0.8845208845208846


  1%|▏         | 25/1687 [00:00<00:06, 242.26it/s]

[12] loss: 0.303


 77%|███████▋  | 1291/1687 [00:05<00:01, 263.08it/s]

In [36]:
validate(model)

100%|██████████| 1687/1687 [00:06<00:00, 258.59it/s]

	Accuracy:	0.890337877889745
	Precision:	0.8838323353293414
	Recall:   	0.8934624697336562
	F1-score:	0.8886213124623721





In [None]:
#model_state_dict_path = "/nfs/students/summer-term-2020/project-4/data/models/SpectrogramBasedCNN.pt"
#torch.save(model.state_dict(), model_state_dict_path)

# Backprop

In [25]:
hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 1
}

model = CRNNPLModule(hparams).cuda()

In [26]:
datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

In [27]:
x = next(iter(model.get_dataloader("training", batch_size=32)))
x['audio'], x['label'] = x['audio'].to(device), x['label'].to(device)

In [29]:
model.eval()
model.zero_grad()
x['audio'].requires_grad_()

outputs = model(x)

loss = nn.CrossEntropyLoss()(outputs, x['label'])
loss.backward()
print(x['audio'].grad)

torch.Size([32, 481489])
torch.Size([32, 9, 128])
torch.Size([9, 32, 128])
torch.Size([32, 64])
torch.Size([32, 2])
tensor([[-9.9304e-08, -1.9802e-07, -1.9548e-07,  ..., -4.8839e-11,
         -4.5564e-11, -2.2173e-11],
        [-1.1339e-10, -2.2373e-10, -2.1470e-10,  ..., -8.9343e-11,
         -7.7258e-11, -3.6642e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.8451e-09, -1.5406e-08, -1.3339e-08,  ..., -2.0062e-13,
         -2.0022e-13, -1.0031e-13],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7652e-08, -3.4504e-08, -3.2775e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')


# Debugging dataset handler

In [3]:
x = torch.load("/nfs/students/summer-term-2020/project-4/data/models/crnn.pth")

In [None]:
datasetHandler = DatasetHandler()

In [None]:
from classification.models.M5 import M5PLModule

hparams = {
    "batch_size": 2,
    "learning_rate": 0.001,
    "weight_decay": 0,
    "lr_decay": 1
}

model = M5PLModule(hparams)

datasetHandler.load_datasets(model)

In [None]:
for x in model.get_dataloader("training", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("validation", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("testing", batch_size=32):
    pass