In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

import pandas as pd
import numpy as np

In [14]:
df = pd.read_csv("Data/features_30_sec.csv")

In [15]:
df_modified = df.drop(df[df.filename == "jazz.00054.wav"].index)

In [16]:
import pickle

infile = open("mfcc_list",'rb')
mfcc_list = pickle.load(infile)
infile.close()

In [17]:
labels = df_modified['label']

In [18]:
mfcc_sizes = []

for i in mfcc_list:
    mfcc_sizes.append(i.shape[1])

In [19]:
for i in range(len(mfcc_list)):
    if mfcc_list[i].shape[1] >= min(mfcc_sizes):
        mfcc_list[i] = np.resize(mfcc_list[i], (20, min(mfcc_sizes)))

In [35]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    mfcc_list, labels, test_size=0.2, random_state=42
)

In [36]:
from sklearn.preprocessing import LabelBinarizer

le = LabelBinarizer()
y_train = le.fit_transform(y_train.values)
y_test = le.transform(y_test.values)

In [22]:
def force_cudnn_initialization():
    s = 32
    dev = torch.device('cuda')
    torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=dev), torch.zeros(s, s, s, s, device=dev))

In [23]:
force_cudnn_initialization()

In [62]:
class Linear(nn.Module):
    def __init__(self):
        super(Linear, self).__init__()
        self.hidden_size = 1290
        drp = 0.1
        n_classes = len(le.classes_)
        self.fc1 = nn.Linear(1290*20, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drp)
        self.out = nn.Linear(64, n_classes)
        # self.softmax = nn.Softmax(n_classes)

    def forward(self, x):
        x = torch.flatten(x, start_dim=1)
        fc1 = self.dropout(self.relu(self.fc1(x)))
        fc2 = self.dropout(self.relu(self.fc2(fc1)))
        fc3 = self.dropout(self.relu(self.fc3(fc2)))
        fc4 = self.dropout(self.relu(self.fc4(fc3)))
        fc5 = self.dropout(self.relu(self.fc5(fc4)))
        out = self.out(fc5)
        return out


In [58]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = F.softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
    _, y_test_tags = torch.max(y_test, dim = 1) 
    
    correct_pred = (y_pred_tags == y_test_tags).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [63]:
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_epochs = 20
batch_size = 32
model = Linear()
loss_fn = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001
)

model.cuda()

# Load train and test in CUDA Memory
x_train = torch.tensor(X_train, dtype=torch.float).cuda()
y_train = torch.tensor(y_train, dtype=torch.float).cuda()
x_cv = torch.tensor(X_test, dtype=torch.float).cuda()
y_cv = torch.tensor(y_test, dtype=torch.float).cuda()

# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)

# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)

train_loss = []
valid_loss = []

for epoch in range(n_epochs):
    start_time = time.time()
    # Set model to train configuration
    model.train()
    avg_loss = 0.0
    accuracy = []
    for i, (x_batch, y_batch) in enumerate(train_loader):
        # Predict/Forward Pass
        y_pred = model(x_batch)
        # # Casting
        # x_batch = x_batch.to(device)
        # y_batch = y_batch.type(torch.LongTensor)
        # y_batch = y_batch.to(device)
        # Compute loss
        # print(y_pred)
        loss = loss_fn(y_pred, y_batch)
        acc = multi_acc(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
        accuracy.append(acc.item())

    # Set model to validation configuration -Doesn't get trained here
    model.eval()
    avg_val_loss = 0.0
    val_accuracy = []
    val_preds = np.zeros((len(x_cv), len(le.classes_)))

    for i, (x_batch, y_batch) in enumerate(valid_loader):
        # Casting
        # x_batch = x_batch.to(device)
        # y_batch = y_batch.type(torch.LongTensor)
        # y_batch = y_batch.to(device)
        # Detach
        y_pred = model(x_batch).detach()
        val_acc = multi_acc(y_pred, y_batch)
        avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
        val_accuracy.append(val_acc.item())

    # Check Accuracy
    # val_accuracy = sum(val_preds.argmax(axis=1) == y_test) / len(y_test)
    train_loss.append(avg_loss)
    valid_loss.append(avg_val_loss)
    elapsed_time = time.time() - start_time
    print(
        "Epoch {}/{} \t loss={:.4f} \t acc={:.2f}% \t val_loss={:.4f} \t val_acc={:.2f}% \t time={:.2f}s".format(
            epoch + 1, n_epochs, avg_loss, np.mean(accuracy), avg_val_loss, np.mean(val_accuracy), elapsed_time
        )
    )


  y_train = torch.tensor(y_train, dtype=torch.float).cuda()


Epoch 1/20 	 loss=70.5245 	 acc=22.80% 	 val_loss=56.9808 	 val_acc=23.71% 	 time=0.19s
Epoch 2/20 	 loss=60.7244 	 acc=31.08% 	 val_loss=50.7228 	 val_acc=30.43% 	 time=0.19s
Epoch 3/20 	 loss=55.3707 	 acc=36.52% 	 val_loss=48.7639 	 val_acc=34.29% 	 time=0.18s
Epoch 4/20 	 loss=52.0229 	 acc=39.28% 	 val_loss=50.0579 	 val_acc=34.86% 	 time=0.18s
Epoch 5/20 	 loss=49.3969 	 acc=44.20% 	 val_loss=48.7090 	 val_acc=40.29% 	 time=0.18s
Epoch 6/20 	 loss=44.6894 	 acc=50.88% 	 val_loss=44.0610 	 val_acc=47.00% 	 time=0.18s
Epoch 7/20 	 loss=42.9247 	 acc=52.36% 	 val_loss=41.5812 	 val_acc=45.57% 	 time=0.18s
Epoch 8/20 	 loss=38.9651 	 acc=58.56% 	 val_loss=45.0380 	 val_acc=45.14% 	 time=0.18s
Epoch 9/20 	 loss=34.1048 	 acc=62.64% 	 val_loss=44.0487 	 val_acc=50.43% 	 time=0.18s
Epoch 10/20 	 loss=32.8102 	 acc=61.68% 	 val_loss=42.9076 	 val_acc=47.86% 	 time=0.19s
Epoch 11/20 	 loss=29.7969 	 acc=67.28% 	 val_loss=45.1633 	 val_acc=50.29% 	 time=0.19s
Epoch 12/20 	 loss=26.0154 	 a