# Preamble

In [13]:
# json
import json

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio

import matplotlib.pyplot as plt
import librosa

import librosa.display
import IPython.display as ipd

import numpy as np
import pickle
import random

In [14]:
import torch
import librosa
from torch_specinv import griffin_lim
from torch_specinv.metrics import spectral_convergence as SC

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [15]:
from tqdm import tqdm
from sklearn import metrics

In [16]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [18]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load data

In [6]:
import torch
from datasets.audioset import Audioset
from torchvision import transforms
from torch.utils.data import DataLoader

audio_set_tr = Audioset(split_mode='training', fixed_padding=True)
audio_set_val = Audioset(split_mode='validation', fixed_padding=True)

# Validating

In [98]:
def reportScore(y_true, y_pred):
    print("\tAccuracy:\t" + str(metrics.accuracy_score(y_true,y_pred)))
    print("\tPrecision:\t" + str(metrics.precision_score(y_true,y_pred)))
    print("\tRecall:   \t" + str(metrics.recall_score(y_true,y_pred)))
    print("\tF1-score:\t" + str(metrics.f1_score(y_true,y_pred)))
    #tn, fp, fn, tp = metrics.confusion_matrix(y_true,y_pred).ravel()

In [135]:
def validate(model):
    valid_loader = DataLoader(audio_set_val, batch_size=1)
    
    model.eval()

    y_true = []
    y_pred = []
    with torch.no_grad():
        for data in tqdm(valid_loader):
            data = [item.cuda() for item in data] # move to gpu
            inputs, labels = data[:2], data[-1]
            outputs = model(inputs)

            y_pred.append(torch.max(outputs.data, 1)[1].item())
            y_true.append(labels.item())
            
    reportScore(y_true, y_pred)

# Model definition

In [133]:
class SpectrogramCNN(nn.Module):
    def __init__(self):
        super(SpectrogramCNN, self).__init__()
        
        self.windowsize = 2048
        self.window = torch.hann_window(self.windowsize).cuda()
        
        self.bn0 = nn.BatchNorm2d(1)
        self.conv1 = nn.Conv2d(1, 5, kernel_size=10,stride=2)
        self.bn1 = nn.BatchNorm2d(5)
        self.conv2 = nn.Conv2d(5, 5, kernel_size=10,stride=2)
        self.bn2 = nn.BatchNorm2d(5)
        self.conv3 = nn.Conv2d(5, 10, kernel_size=20,stride=3)
        self.bn3 = nn.BatchNorm2d(10)
        self.conv4 = nn.Conv2d(10, 15, kernel_size=20,stride=3)
        self.bn4 = nn.BatchNorm2d(15)
        self.fc1 = nn.Linear(5100, 50)
        self.fc2 = nn.Linear(50, 2)

    def forward(self, x):
        x, _ = x
        x = torch.stft(x, self.windowsize, window=self.window).pow(2).sum(3).sqrt()
        x = x.unsqueeze(1).float()
        x = self.bn0(x)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.view(-1, 5100)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return F.log_softmax(x,dim=1)

In [136]:
torch.cuda.empty_cache()

model = SpectrogramCNN().cuda()
#print(model)
model.float()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loader = DataLoader(audio_set_tr, batch_size=32)

model.train()

for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        data = [item.cuda() for item in data] # move to gpu
        x, labels = data[:2], data[-1]
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(x)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # track statistics
        running_loss += loss.item()
    #print(outputs[:10])
    
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / (i+1)))
    validate(model)
    
print('Finished Training')

print("Validate:")
validate(model)

  1%|          | 19/1687 [00:00<00:08, 189.95it/s]

[1] loss: 0.456


100%|██████████| 1687/1687 [00:06<00:00, 261.88it/s]


	Accuracy:	0.8494368701837581
	Precision:	0.8341121495327103
	Recall:   	0.864406779661017
	F1-score:	0.8489892984542212


  2%|▏         | 29/1687 [00:00<00:05, 285.82it/s]

[2] loss: 0.405


100%|██████████| 1687/1687 [00:05<00:00, 288.66it/s]


	Accuracy:	0.8429164196799052
	Precision:	0.8058887677208287
	Recall:   	0.8946731234866828
	F1-score:	0.8479632816982214


  2%|▏         | 32/1687 [00:00<00:05, 314.71it/s]

[3] loss: 0.376


100%|██████████| 1687/1687 [00:06<00:00, 279.74it/s]


	Accuracy:	0.8506224066390041
	Precision:	0.8072805139186295
	Recall:   	0.9128329297820823
	F1-score:	0.8568181818181818


  2%|▏         | 27/1687 [00:00<00:06, 269.82it/s]

[4] loss: 0.351


100%|██████████| 1687/1687 [00:05<00:00, 285.65it/s]


	Accuracy:	0.8761114404267931
	Precision:	0.8642266824085005
	Recall:   	0.8861985472154964
	F1-score:	0.8750747160789002


  2%|▏         | 32/1687 [00:00<00:05, 312.78it/s]

[5] loss: 0.328


100%|██████████| 1687/1687 [00:06<00:00, 278.85it/s]
  2%|▏         | 27/1687 [00:00<00:06, 267.39it/s]

	Accuracy:	0.8790752815649081
	Precision:	0.8676122931442081
	Recall:   	0.8886198547215496
	F1-score:	0.8779904306220097
Finished Training
Validate:


100%|██████████| 1687/1687 [00:05<00:00, 285.77it/s]

	Accuracy:	0.8790752815649081
	Precision:	0.8676122931442081
	Recall:   	0.8886198547215496
	F1-score:	0.8779904306220097





In [139]:
validate(model)

100%|██████████| 1687/1687 [00:05<00:00, 303.93it/s]

	Accuracy:	0.8790752815649081
	Precision:	0.8676122931442081
	Recall:   	0.8886198547215496
	F1-score:	0.8779904306220097





In [140]:
model_state_dict_path = "/nfs/students/summer-term-2020/project-4/data/models/SpectrogramBasedCNN.pt"
torch.save(model.state_dict(), model_state_dict_path)