In [128]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

import pandas as pd
import numpy as np

In [129]:
df = pd.read_csv("Data/features_30_sec.csv")

In [130]:
df_modified = df.drop(df[df.filename == "jazz.00054.wav"].index)

In [131]:
import pickle

infile = open("mfcc_list",'rb')
mfcc_list = pickle.load(infile)
infile.close()

In [132]:
labels = df_modified['label']

In [133]:
mfcc_sizes = []

for i in mfcc_list:
    mfcc_sizes.append(i.shape[1])

In [134]:
for i in range(len(mfcc_list)):
    if mfcc_list[i].shape[1] >= min(mfcc_sizes):
        mfcc_list[i] = np.resize(mfcc_list[i], (20, min(mfcc_sizes)))

In [135]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    mfcc_list, labels, test_size=0.2, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_val, y_val, test_size=0.5, random_state=42
)

In [136]:
from sklearn.preprocessing import LabelBinarizer

le = LabelBinarizer()
y_train = le.fit_transform(y_train.values)
y_test = le.transform(y_test.values)

In [137]:
def force_cudnn_initialization():
    s = 20
    dev = torch.device('cuda')
    torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=dev), torch.zeros(s, s, s, s, device=dev))

In [138]:
force_cudnn_initialization()

In [139]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_size, batch_size):
        super(GRUNet, self).__init__()
        self.hidden_size = hidden_size
        self.batch = batch_size
        self.input_dim = input_dim
        drp = 0.1
        n_classes = len(le.classes_)
        self.gru = nn.GRU(self.input_dim, self.hidden_size)
        self.fc1 = nn.Linear(self.hidden_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drp)
        self.out = nn.Linear(64, n_classes)
        # self.softmax = nn.Softmax(n_classes)
    
    def initialize_hidden_state(self, device):
        return torch.zeros((1, self.batch, self.hidden_size)).to(device)

    def forward(self, x, lens, device):
        self.hidden = self.initialize_hidden_state(device)
        gru, self.hidden = self.gru(x, self.hidden)
        out = self.dropout(gru[-1, :, :])
        fc1 = self.dropout(self.relu(self.fc1(out)))
        fc2 = self.dropout(self.relu(self.fc2(fc1)))
        out = self.out(fc2)
        return out, self.hidden


In [140]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = F.softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
    _, y_test_tags = torch.max(y_test, dim = 1) 
    
    correct_pred = (y_pred_tags == y_test_tags).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [141]:
# Use Dataset class to represent the dataset object
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
class MyData(Dataset):
    def __init__(self, X, y):
        self.data = X
        self.target = y
        self.length = [np.sum(1 - np.equal(x,0)) for x in X]
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        x_len = self.length[index]
        
        return x, y, x_len
    
    def __len__(self):
        return len(self.data)

In [142]:
train_dataset = MyData(X_train, y_train)
val_dataset = MyData(X_val, y_val)
test_dataset = MyData(X_test, y_test)

import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_epochs = 20
batch_size = 20
units = 256
inp_size =20
model = GRUNet(inp_size, units, batch_size)
loss_fn = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001
)

TRAIN_BUFFER_SIZE = len(X_train)
VAL_BUFFER_SIZE = len(X_val)
TEST_BUFFER_SIZE = len(X_test)

TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // batch_size
VAL_N_BATCH = VAL_BUFFER_SIZE // batch_size
TEST_N_BATCH = TEST_BUFFER_SIZE // batch_size

#create 
train_loader = DataLoader(train_dataset, batch_size = batch_size,
                          drop_last=True, shuffle=True)
valid_loader = DataLoader(val_dataset, batch_size = batch_size,
                          drop_last=True, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size = batch_size,
                          drop_last=True, shuffle=True)

use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

#loss criterion and optimizer
criterion = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(model.parameters())

def loss_function(y, prediction):
    target = torch.max(y, 1)[1] 
    loss = criterion(prediction, target)
    
    return loss

def accuracy(target, logit):
    target = torch.max(target, 1)[1]
    corrects = (torch.max(logit, 1)[1].data == target).sum()
    accuracy = 100. * corrects / len(logit)
    
    return accuracy

train_loss = []
valid_loss = []

EPOCHS = 10

for epoch in range(EPOCHS):
    start_time = time.time()
    taccuracy = []
    vaccuracy = []
    start = time.time()
    total_loss = 0
    train_accuracy, val_accuracy = 0, 0
    
    for (batch, (inp, targ, lens)) in enumerate(train_loader):
        loss = 0
        predictions, _ = model(inp.permute(2,1,0).to(device), lens, device)
        
        loss += loss_function(targ.to(device), predictions)
        batch_loss = (loss / int(targ.shape[1]))
        total_loss += batch_loss
        taccuracy.append(loss)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        batch_accuracy = accuracy(targ.to(device), predictions)
        train_accuracy += batch_accuracy
        
        if batch % 100 == 0:
            print('Epoch {} Batch {} Val Loss {:.4f}'.format(epoch + 1,
                                                              batch, 
                                                              batch_loss.cpu().detach().numpy()))
            
    for (batch, (inp, targ, lens)) in enumerate(valid_loader):
        
        predictions, _ = model(inp.permute(2,1, 0).to(device), lens, device)
        batch_accuracy = accuracy(targ.to(device), predictions)
        val_accuracy += batch_accuracy
        
    print('Epoch {} Loss {:.4f} -- Train Acc. {:.4f} -- Val Acc. {:.4f}'.format(epoch + 1,
                                                                                total_loss / TRAIN_N_BATCH,
                                                                                train_accuracy / TRAIN_N_BATCH,
                                                                                val_accuracy / TEST_N_BATCH))
        
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
    
    
    

Epoch 1 Batch 0 Val Loss 4.7132


KeyError: 0