# Deep Learning Project

## Build a Music Genre Classification System

In this project, I will design and implement a deep learning model that learns to properly classify music based on genres. I will train the model using data gotten from the GTZAN Dataset. 

Pytorch will be used to build the model. 

### Implementation
I started by importing the modules required for this project and loading in the data.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import torch
import seaborn as sns
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataset import random_split
import os

%matplotlib inline

In [None]:
df = pd.read_csv("Data/features_30_sec.csv")
df["label"].unique()
df["class_id"] = pd.Categorical(df.label).codes
classes = list(df["label"].unique())
df.head()

In [None]:
file_path = "Data/genres_original"

sample_rate = 22050
duration = 30 #Each audio is 30 seconds long
samples_per_track = sample_rate * duration

error_file = "jazz.00054.wav"

def extract_mfcc_features(file_path, num_segments=10, n_mfcc=20, n_fft=2048, hop_length=512):
    
    """
    :param file_path: path to file
    :param num_segments: number of segments sample tracks will be divided into
    :param n_mfcc: number of coefficients to extract
    :param n_fft: interval considered to apply the Fast Fourier transform (FFT)
    :param hop_length: sliding window for the FFT
    """

    data = {
        "mfcc": [],
        "labels": [],
        }
    samples_per_segment = int(samples_per_track / num_segments)

    for i in range(len(df)):
        
        folder = df["label"].iloc[i]
        filename = df['filename'].iloc[i]

        path = f"Data/genres_original/{folder}/{filename}"
    
        if filename != error_file:
            for num in range(num_segments):
                signal_audio, sr = librosa.load(path, sr=sample_rate)
                start_sample = samples_per_segment * num
                finish_sample = start_sample + samples_per_segment
                mfcc = librosa.feature.mfcc(signal_audio[start_sample:finish_sample], 
                                            sr=sr, n_mfcc=n_mfcc, 
                                            n_fft=n_fft, hop_length=hop_length)
                mfcc = np.mean(mfcc.T, axis=0)
                data["mfcc"].append(mfcc.tolist())
                data["labels"].append(df["class_id"].iloc[i])
    
    features = torch.tensor(data["mfcc"])
    labels = torch.tensor(data["labels"])
    
    torch.save(features, "data/features.pt")
    torch.save(labels, "data/label.pt")

if __name__ == "__main__":
    extract_mfcc_features(file_path)

Here we load our data and create the dataloaders needed for our training, testing, and validation sets. 

In [None]:
features=torch.load("data/features.pt")
labels=torch.load("data/label.pt")

def get_dataset():
    data=[]
    label=[]
    for i in range(len(features)):
        data.append(features[i])
        label.append(labels[i])
    data = torch.stack(data)
    label = torch.stack(label)
    label = label.long()
   
    return TensorDataset(data, label)

dataset = get_dataset()

split_size = int(0.3 * len(dataset))
train_size = len(dataset) - split_size

val_size = int(0.5 * split_size)
test_size = split_size - val_size

train_ds, remaining_ds = random_split(dataset, [train_size, split_size])
val_ds, test_ds = random_split(remaining_ds, [val_size, test_size])

print(len(train_ds)) 
print(len(val_ds))
print(len(test_ds))
trainloader = DataLoader(train_ds, batch_size=32, shuffle=True)

validloader = DataLoader(val_ds, batch_size=32, shuffle=True)

testloader = DataLoader(test_ds, batch_size=32, shuffle=True)

In [None]:
class Classifier(nn.Module):
    
    def __init__(self):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(20, 2048)
        self.fc2 = nn.Linear(2048, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 256)
        self.fc5 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.3)
        
    def forward(self, x):

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = F.log_softmax(self.fc5(x), dim=1)
        
        return x

In [None]:
model = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=4e-4)

epochs = 100
steps = 0
val_loss_min = np.Inf


train_losses, test_losses = [], []
for e in range(epochs):
    running_loss = 0
    model.train()
    for features, labels in trainloader:
        optimizer.zero_grad()
        
        log_ps = model(features)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    else:
        val_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            model.eval()
            for features, labels in validloader:
                log_ps = model(features)
                val_loss += criterion(log_ps, labels)
                
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
        train_losses.append(running_loss/len(trainloader))
        test_losses.append(val_loss/len(validloader))
        
        running_loss = running_loss/len(trainloader)
        val_loss = val_loss/len(validloader)
        
        print(f"\nEpoch: {e+1}/{epochs}",
              f"Training Loss: {running_loss}",
              f"Validation Loss: {val_loss}",
              f"Validation Accuracy: {accuracy/len(validloader)}")
        
        if val_loss <= val_loss_min:
            print(f"Validation loss decreased ({val_loss_min} --> {val_loss}). Saving model")
            torch.save(model.state_dict(), 'model.pt')
            val_loss_min = val_loss

Finally, the test accuracy of each genre and the test accuracy overall is displayed.

In [None]:
model.load_state_dict(torch.load('model.pt'))

test_loss = 0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
for features, labels in testloader:
    
    batch_size = features.size(0)
    output = model(features)
    loss = criterion(output, labels)
    test_loss += loss.item()
    ps = torch.exp(output)
    top_p, top_class = ps.topk(1, dim=1)
    
    _, pred = torch.max(output, 1)    
    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy())
    
   
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = labels.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1
    
test_loss = test_loss/len(testloader.dataset)
print(f"Test Loss: {test_loss}\n")

for i in range(10):
    if class_total[i] > 0:
        print(f"Test Accuracy of {classes[i]}: {round((100 * class_correct[i]) / class_total[i], 2)} ({int(np.sum(class_correct[i]))}/{int(np.sum(class_total[i]))})")
        
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print(f"\nTest Accuracy (Overall): {(100. * np.sum(class_correct) / np.sum(class_total))} ({int(np.sum(class_correct))}/{int(np.sum(class_total))})")