In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Data/features_30_sec.csv")

In [3]:
df_modified = df.drop(df[df.filename == "jazz.00054.wav"].index)

In [4]:
import pickle

infile = open("mfcc_list",'rb')
mfcc_list = pickle.load(infile)
infile.close()

In [5]:
labels = df_modified['label']

In [6]:
mfcc_sizes = []

for i in mfcc_list:
    mfcc_sizes.append(i.shape[1])

In [7]:
for i in range(len(mfcc_list)):
    if mfcc_list[i].shape[1] >= min(mfcc_sizes):
        mfcc_list[i] = np.resize(mfcc_list[i], (32, min(mfcc_sizes)))

In [12]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    mfcc_list, labels, test_size=0.2, random_state=42
)


In [13]:
from sklearn.preprocessing import LabelBinarizer

le = LabelBinarizer()
y_train = le.fit_transform(y_train.values)
y_test = le.transform(y_test.values)

In [14]:
def force_cudnn_initialization():
    s = 32
    dev = torch.device('cuda')
    torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=dev), torch.zeros(s, s, s, s, device=dev))

In [27]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = F.softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
    _, y_test_tags = torch.max(y_test, dim = 1) 
    
    correct_pred = (y_pred_tags == y_test_tags).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [15]:
force_cudnn_initialization()

In [22]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_size, batch_size):
        super(GRUNet, self).__init__()
        self.hidden_size = hidden_size
        self.batch = batch_size
        self.input_dim = input_dim
        drp = 0.1
        n_classes = len(le.classes_)
        self.gru = nn.GRU(self.input_dim, self.hidden_size)
        self.fc1 = nn.Linear(self.hidden_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drp)
        self.out = nn.Linear(64, n_classes)
        # self.softmax = nn.Softmax(n_classes)
    
    def initialize_hidden_state(self, device):
        return torch.zeros((1, self.batch, self.hidden_size)).to(device)

    def forward(self, x):
        self.hidden = self.initialize_hidden_state(device)
        gru, self.hidden = self.gru(x, self.hidden)
        out = self.dropout(gru[-1, :, :])
        fc1 = self.dropout(self.relu(self.fc1(out)))
        fc2 = self.dropout(self.relu(self.fc2(fc1)))
        out = self.out(fc2)
        return out, self.hidden


In [23]:
from torchvision import models
print(model)

GRUNet(
  (gru): GRU(20, 256)
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (out): Linear(in_features=64, out_features=10, bias=True)
)


In [32]:
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_epochs = 20
batch_size = 32
model = GRUNet(1290, 256, batch_size)
loss_fn = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001
)

model.cuda()

# Load train and test in CUDA Memory
x_train = torch.tensor(X_train, dtype=torch.float).cuda()
y_train = torch.tensor(y_train, dtype=torch.float).cuda()
x_cv = torch.tensor(X_test, dtype=torch.float).cuda()
y_cv = torch.tensor(y_test, dtype=torch.float).cuda()

# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)

# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False, drop_last = True)

train_loss = []
valid_loss = []

for epoch in range(n_epochs):
    start_time = time.time()
    # Set model to train configuration
    model.train()
    avg_loss = 0.0
    accuracy = []
    for i, (x_batch, y_batch) in enumerate(train_loader):
        # Predict/Forward Pass
        y_pred, hidden = model(x_batch)
        # # Casting
        # x_batch = x_batch.to(device)
        # y_batch = y_batch.type(torch.LongTensor)
        # y_batch = y_batch.to(device)
        # Compute loss
        # print(y_pred)
        #print(x_batch.shape, y_batch.shape)
        loss = loss_fn(y_pred, y_batch)
        acc = multi_acc(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
        accuracy.append(acc.item())

    # Set model to validation configuration -Doesn't get trained here
    model.eval()
    avg_val_loss = 0.0
    val_accuracy = []
    val_preds = np.zeros((len(x_cv), len(le.classes_)))

    for i, (x_batch, y_batch) in enumerate(valid_loader):
        # Casting
        # x_batch = x_batch.to(device)
        # y_batch = y_batch.type(torch.LongTensor)
        # y_batch = y_batch.to(device)
        # Detach
        y_pred, hidden2 = model(x_batch)
        y_pred = y_pred.detach()
        val_acc = multi_acc(y_pred, y_batch)
        avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
        val_accuracy.append(val_acc.item())

    # Check Accuracy
    # val_accuracy = sum(val_preds.argmax(axis=1) == y_test) / len(y_test)
    train_loss.append(avg_loss)
    valid_loss.append(avg_val_loss)
    elapsed_time = time.time() - start_time
    print(
        "Epoch {}/{} \t loss={:.4f} \t acc={:.2f}% \t val_loss={:.4f} \t val_acc={:.2f}% \t time={:.2f}s".format(
            epoch + 1, n_epochs, avg_loss, np.mean(accuracy), avg_val_loss, np.mean(val_accuracy), elapsed_time
        )
    )

  y_train = torch.tensor(y_train, dtype=torch.float).cuda()


Epoch 1/20 	 loss=73.8278 	 acc=9.17% 	 val_loss=73.7470 	 val_acc=9.33% 	 time=0.16s
Epoch 2/20 	 loss=73.7728 	 acc=9.58% 	 val_loss=73.6836 	 val_acc=9.83% 	 time=0.16s
Epoch 3/20 	 loss=73.8509 	 acc=9.54% 	 val_loss=73.7807 	 val_acc=8.33% 	 time=0.16s
Epoch 4/20 	 loss=73.7471 	 acc=12.54% 	 val_loss=73.8128 	 val_acc=8.17% 	 time=0.15s
Epoch 5/20 	 loss=73.9276 	 acc=10.08% 	 val_loss=73.8559 	 val_acc=9.33% 	 time=0.16s
Epoch 6/20 	 loss=73.7820 	 acc=9.00% 	 val_loss=73.8917 	 val_acc=10.33% 	 time=0.16s
Epoch 7/20 	 loss=73.8182 	 acc=10.33% 	 val_loss=73.9496 	 val_acc=8.67% 	 time=0.16s
Epoch 8/20 	 loss=73.7310 	 acc=11.25% 	 val_loss=73.8610 	 val_acc=7.67% 	 time=0.15s
Epoch 9/20 	 loss=73.7561 	 acc=8.88% 	 val_loss=73.8978 	 val_acc=6.00% 	 time=0.16s
Epoch 10/20 	 loss=73.7733 	 acc=9.12% 	 val_loss=73.8634 	 val_acc=8.00% 	 time=0.16s
Epoch 11/20 	 loss=73.6813 	 acc=11.17% 	 val_loss=73.8527 	 val_acc=7.83% 	 time=0.16s
Epoch 12/20 	 loss=73.7320 	 acc=10.00% 	 val_