# Pytorch neural net for music genre classification

This notebook trains a Pytorch feed forward neural net to do multiclass classification of music genre based on training data containing the following:

1. Intensity of particular instruments representing one of 8 music genres (represented as a list of np.array float32)
2. A integer label [0-7] representing the following music genres ['DRUM & BASS':0, 'R&B':1, 'BLUES':2, 'VOCAL JAZZ':3, 'NATURE SOUNDS':4, 'BAROQUE':5, 'DISNEY':6, 'HARD ROCK':7]

Currently the entire training & inference is run locally on the machine running the notebook therefore GPU is required on the Notebook host however next iteration of this will leverage Sagemaker for training

In [None]:
!pip install -r ./requirements.txt

In [None]:
# Import sklearn datasets and train_test_split
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

# Import plotting libraries
import seaborn as sns
import matplotlib.pyplot as plt

# Import Pytorch
import torch
from torch import nn
from d2l import torch as d2l
import torchvision
from torch.utils import data
from torchvision import transforms
import torch.nn.functional as F

# Import helper libraries
import random
import numpy as np
import pandas as pd
import boto3
import pickle
import time

### Train & Test data

In [None]:
# import the datasets
bucketname = 'BUCKET_NAME' # replace with your S3 bucket name

In [None]:
s3 = boto3.resource('s3')
s3.Bucket(bucketname).download_file('pytorch-multiclass/data/DL1_Train.pkl', '../data/DL1_Train.pkl')
s3.Bucket(bucketname).download_file('pytorch-multiclass/data/DL1_Test.pkl', '../data/DL1_Test.pkl')

final_train = pickle.load( open( "../data/DL1_Train.pkl", "rb" ), encoding='latin1')
final_test = pickle.load( open( "../data/DL1_Test.pkl", "rb" ), encoding='latin1')

td = {'DRUM & BASS':0, 'R&B':1, 'BLUES':2, 'VOCAL JAZZ':3, 'NATURE SOUNDS':4, 'BAROQUE':5, 'DISNEY':6, 'HARD ROCK':7}

X = np.array([final_train[key]['PACH'] for key in final_train.keys() if len(final_train[key]['text_genre']) == 1])
y = np.array([td[final_train[key]['text_genre'][0]] for key in final_train.keys() if len(final_train[key]['text_genre']) == 1])

X_train, X_val, y_train, y_val = train_test_split(X, y, random_state = 8675309)

In [None]:
final_train_dataset = data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train)) 
final_train_loader = data.DataLoader(final_train_dataset, batch_size=100, shuffle=True)

final_val_dataset = data.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val)) 
final_val_loader = data.DataLoader(final_val_dataset, batch_size=100, shuffle=True)

## Model accuracy functions

In [None]:
def evaluate_accuracy_gpu(net, data_iter, device=None): #@save
    """Compute the accuracy for a model on a dataset using a GPU."""
    if isinstance(net, torch.nn.Module):
        net.eval()  # Set the model to evaluation mode
        if not device:
            device = next(iter(net.parameters())).device
    # No. of correct predictions, no. of predictions
    metric = d2l.Accumulator(2)
    for X, y in data_iter:
        if isinstance(X, list):
            # Required for BERT Fine-tuning (to be covered later)
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        y = y.to(device)
        metric.add(d2l.accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]

## Neural net


In [None]:
net = nn.Sequential(nn.Flatten(), 
                     nn.Dropout(.2), 
                     nn.Linear(4096, 1024), 
                     nn.ReLU(),
                     nn.BatchNorm1d(1024), 
                     nn.Dropout(.5), 
                     nn.Linear(1024, 8))

## Training

In [None]:
def train_model(net, train_iter, test_iter, num_epochs = 20, device=d2l.try_gpu(), lrate=0.005):
    """Train a model with a GPU (defined in Chapter 6)."""
    def init_weights(m):
        if type(m) == nn.Linear:
            nn.init.kaiming_normal_(m.weight, mode='fan_out')
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lrate)
    loss = nn.CrossEntropyLoss()
    test_losses = []
    animator = d2l.Animator(xlabel='epoch', xlim=[0, num_epochs],
                            legend=['val acc'])
    timer = d2l.Timer()
    for epoch in range(num_epochs):
        metric = d2l.Accumulator(2)
        net.train()
        for i, (X, y) in enumerate(final_train_loader):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            metric.add(l.sum(),  X.shape[0])
            timer.stop()
            train_loss = metric[0]/metric[1]
        test_acc = evaluate_accuracy_gpu(net, final_val_loader)
        animator.add(epoch+1, (test_acc))
    print('validation acc %.3f' % (test_acc))
    print('%.1f examples/sec on %s' % (metric[1]*num_epochs/timer.sum(), device))

In [None]:
torch.manual_seed(8675309)

In [None]:
%%time
train_model(net, final_train_loader, final_val_loader, num_epochs = 70)

## Batch prediction

In [None]:
final_X = np.array(final_test["test_data"])
final_X = torch.from_numpy(final_X).to(d2l.try_gpu())

preds = torch.argmax(net(final_X), axis=1).type(torch.long)

genre_list = [
    "DRUM & BASS",
    "R&B",
    "BLUES",
    "VOCAL JAZZ",
    "NATURE SOUNDS",
    "BAROQUE",
    "DISNEY",
    "HARD ROCK",
]
predictions = [genre_list[p] for p in preds.tolist()]
df_out = pd.DataFrame()
df_out["ID"] = final_test["test_labels"]
df_out["genre"] = predictions

In [None]:
df_out.head()

In [None]:
df_out.to_csv("./NN_music_genre.csv", index=False)