# Preamble

In [1]:
# json
import json

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio

import matplotlib.pyplot as plt
import librosa

import librosa.display
import IPython.display as ipd

import numpy as np
import pickle
import random

In [2]:
import torch
import librosa
from torch_specinv import griffin_lim
from torch_specinv.metrics import spectral_convergence as SC

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [3]:
%load_ext autoreload

In [4]:
from tqdm import tqdm
from sklearn import metrics

In [5]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [6]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load data

In [7]:
from datasets.datasethandler import DatasetHandler 
datasetHandler = DatasetHandler()

# Validating

In [33]:
def reportScore(y_true, y_pred):
    print("\tAccuracy:\t" + str(metrics.accuracy_score(y_true,y_pred)))
    print("\tPrecision:\t" + str(metrics.precision_score(y_true,y_pred)))
    print("\tRecall:   \t" + str(metrics.recall_score(y_true,y_pred)))
    print("\tF1-score:\t" + str(metrics.f1_score(y_true,y_pred)))
    #tn, fp, fn, tp = metrics.confusion_matrix(y_true,y_pred).ravel()

In [34]:
def validate(model):
    valid_loader = model.get_dataloader("validation", shuffle=False) 
    model.eval()

    y_true = []
    y_pred = []
    with torch.no_grad():
        for data in tqdm(valid_loader):
            data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
            outputs = model(data)

            y_pred.append(torch.max(outputs.data, 1)[1].item())
            y_true.append(data['label'].item())
            
    reportScore(y_true, y_pred)

# Training

In [8]:
def test():
    hparams = {
        "batch_size": 24,
        "learning_rate": 0.002,
        "weight_decay": 0.01,
        "lr_decay": 0.95
    }

    model = CRNNPLModule(hparams).to(device)
    model.float()

    datasetHandler.load(model, "training")
    datasetHandler.load(model, "validation")

    x = next(iter(model.get_dataloader("training", batch_size=32)))
    x['audio'] = x['audio'].to(device)
    
    return model(x)

In [20]:
from classification.models.CRNN import CRNNPLModule
_ = test()

In [35]:
torch.cuda.empty_cache()

hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 0.95
}

model = CRNNPLModule(hparams).to(device)
model.float()

datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hparams['learning_rate'])

for epoch in range(12):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(model.get_dataloader("training", batch_size=hparams['batch_size'])):
        data['audio'], data['label'] = data['audio'].to(device), data['label'].to(device)
        
        model.train()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(data)

        loss = criterion(outputs, data['label'])
        loss.backward()
        optimizer.step()
        
        # track statistics
        running_loss += loss.item()
    #print(outputs[:10])
    
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / (i+1)))
    validate(model)
    
print('Finished Training')

print("Validate:")
validate(model)

  2%|▏         | 26/1687 [00:00<00:06, 250.70it/s]

[1] loss: 0.449


100%|██████████| 1687/1687 [00:06<00:00, 261.78it/s]


	Accuracy:	0.8535862477771191
	Precision:	0.8641509433962264
	Recall:   	0.8317191283292978
	F1-score:	0.8476249228871067


  1%|▏         | 25/1687 [00:00<00:06, 240.71it/s]

[2] loss: 0.404


100%|██████████| 1687/1687 [00:06<00:00, 257.47it/s]


	Accuracy:	0.8672199170124482
	Precision:	0.8706896551724138
	Recall:   	0.8559322033898306
	F1-score:	0.8632478632478633


  2%|▏         | 27/1687 [00:00<00:06, 261.14it/s]

[3] loss: 0.379


100%|██████████| 1687/1687 [00:07<00:00, 239.52it/s]


	Accuracy:	0.8684054534676942
	Precision:	0.8673965936739659
	Recall:   	0.8631961259079903
	F1-score:	0.8652912621359222


  2%|▏         | 27/1687 [00:00<00:06, 267.43it/s]

[4] loss: 0.366


100%|██████████| 1687/1687 [00:06<00:00, 262.76it/s]


	Accuracy:	0.8707765263781861
	Precision:	0.8781094527363185
	Recall:   	0.8547215496368039
	F1-score:	0.8662576687116564


  1%|▏         | 25/1687 [00:00<00:06, 240.71it/s]

[5] loss: 0.355


100%|██████████| 1687/1687 [00:07<00:00, 240.30it/s]


	Accuracy:	0.8737403675163011
	Precision:	0.8644470868014269
	Recall:   	0.8801452784503632
	F1-score:	0.8722255548890222


  2%|▏         | 27/1687 [00:00<00:06, 267.09it/s]

[6] loss: 0.342


100%|██████████| 1687/1687 [00:06<00:00, 257.49it/s]


	Accuracy:	0.8725548310610551
	Precision:	0.849942726231386
	Recall:   	0.8983050847457628
	F1-score:	0.8734549735138317


  2%|▏         | 27/1687 [00:00<00:06, 268.18it/s]

[7] loss: 0.335


100%|██████████| 1687/1687 [00:06<00:00, 261.73it/s]


	Accuracy:	0.8796680497925311
	Precision:	0.8739495798319328
	Recall:   	0.8813559322033898
	F1-score:	0.8776371308016878


  1%|▏         | 25/1687 [00:00<00:06, 240.76it/s]

[8] loss: 0.331


100%|██████████| 1687/1687 [00:07<00:00, 236.61it/s]


	Accuracy:	0.8678126852400712
	Precision:	0.8309549945115258
	Recall:   	0.9164648910411622
	F1-score:	0.8716177317213587


  2%|▏         | 27/1687 [00:00<00:06, 261.50it/s]

[9] loss: 0.320


100%|██████████| 1687/1687 [00:06<00:00, 262.85it/s]


	Accuracy:	0.8796680497925311
	Precision:	0.8543799772468714
	Recall:   	0.9092009685230025
	F1-score:	0.8809384164222874


  1%|▏         | 25/1687 [00:00<00:06, 241.73it/s]

[10] loss: 0.322


100%|██████████| 1687/1687 [00:07<00:00, 236.86it/s]


	Accuracy:	0.8855957320687611
	Precision:	0.8845686512758202
	Recall:   	0.8813559322033898
	F1-score:	0.8829593693147363


  2%|▏         | 27/1687 [00:00<00:06, 266.67it/s]

[11] loss: 0.307


100%|██████████| 1687/1687 [00:06<00:00, 262.61it/s]


	Accuracy:	0.8921161825726142
	Precision:	0.867579908675799
	Recall:   	0.9200968523002422
	F1-score:	0.8930669800235018


  1%|▏         | 25/1687 [00:00<00:06, 241.62it/s]

[12] loss: 0.308


100%|██████████| 1687/1687 [00:06<00:00, 259.02it/s]
  2%|▏         | 27/1687 [00:00<00:06, 267.99it/s]

	Accuracy:	0.890337877889745
	Precision:	0.8838323353293414
	Recall:   	0.8934624697336562
	F1-score:	0.8886213124623721
Finished Training
Validate:


100%|██████████| 1687/1687 [00:07<00:00, 239.72it/s]

	Accuracy:	0.890337877889745
	Precision:	0.8838323353293414
	Recall:   	0.8934624697336562
	F1-score:	0.8886213124623721





In [36]:
validate(model)

100%|██████████| 1687/1687 [00:06<00:00, 258.59it/s]

	Accuracy:	0.890337877889745
	Precision:	0.8838323353293414
	Recall:   	0.8934624697336562
	F1-score:	0.8886213124623721





In [None]:
#model_state_dict_path = "/nfs/students/summer-term-2020/project-4/data/models/SpectrogramBasedCNN.pt"
#torch.save(model.state_dict(), model_state_dict_path)

# Backprop

In [25]:
hparams = {
    "batch_size": 24,
    "learning_rate": 0.002,
    "weight_decay": 0.01,
    "lr_decay": 1
}

model = CRNNPLModule(hparams).cuda()

In [26]:
datasetHandler.load(model, "training")
datasetHandler.load(model, "validation")

In [27]:
x = next(iter(model.get_dataloader("training", batch_size=32)))
x['audio'], x['label'] = x['audio'].to(device), x['label'].to(device)

In [29]:
model.eval()
model.zero_grad()
x['audio'].requires_grad_()

outputs = model(x)

loss = nn.CrossEntropyLoss()(outputs, x['label'])
loss.backward()
print(x['audio'].grad)

torch.Size([32, 481489])
torch.Size([32, 9, 128])
torch.Size([9, 32, 128])
torch.Size([32, 64])
torch.Size([32, 2])
tensor([[-9.9304e-08, -1.9802e-07, -1.9548e-07,  ..., -4.8839e-11,
         -4.5564e-11, -2.2173e-11],
        [-1.1339e-10, -2.2373e-10, -2.1470e-10,  ..., -8.9343e-11,
         -7.7258e-11, -3.6642e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.8451e-09, -1.5406e-08, -1.3339e-08,  ..., -2.0062e-13,
         -2.0022e-13, -1.0031e-13],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7652e-08, -3.4504e-08, -3.2775e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')


# Debugging dataset handler

In [3]:
x = torch.load("/nfs/students/summer-term-2020/project-4/data/models/crnn.pth")

In [None]:
datasetHandler = DatasetHandler()

In [None]:
from classification.models.M5 import M5PLModule

hparams = {
    "batch_size": 2,
    "learning_rate": 0.001,
    "weight_decay": 0,
    "lr_decay": 1
}

model = M5PLModule(hparams)

datasetHandler.load_datasets(model)

In [None]:
for x in model.get_dataloader("training", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("validation", batch_size=32):
    pass

In [None]:
for x in model.get_dataloader("testing", batch_size=32):
    pass