<a href="https://colab.research.google.com/github/omier/music-genre-classifier/blob/master/DL_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/omier/music-genre-classifier.git

Cloning into 'music-genre-classifier'...
remote: Enumerating objects: 2005, done.[K
remote: Total 2005 (delta 0), reused 0 (delta 0), pack-reused 2005[K
Receiving objects: 100% (2005/2005), 1.19 GiB | 39.69 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Checking out files: 100% (2004/2004), done.


In [None]:
!pip3 install pytorch_lightning efficientnet_pytorch

In [4]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import math
from pytorch_lightning import metrics

In [60]:
import plotly.express as px
import pandas as pd
import numpy as np

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
img_data = 'music-genre-classifier/Data/images_original/'
dataset = torchvision.datasets.ImageFolder(
    root=img_data,
    transform=torchvision.transforms.ToTensor(),
)

In [7]:
NUM_CLASSES = len(dataset.classes)

In [8]:
# 60% train, 20% validate, 20% test
trainset_size=math.ceil(len(dataset)*0.6)
valset_size=math.ceil(len(dataset)*0.2)
testset_size=len(dataset) - trainset_size - valset_size

trainset, valset, testset = torch.utils.data.random_split(dataset, [trainset_size, valset_size, testset_size])

In [9]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True)

valloader = torch.utils.data.DataLoader(valset, batch_size=16,
                                         shuffle=False)

testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False)

In [47]:
data_loaders = {'train': trainloader, 'validation': valloader, 'test': testloader}

In [10]:
trainset[0][0].shape

torch.Size([3, 288, 432])

In [55]:
class CNNGTZAN(nn.Module):

    def __init__(self):
        super(CNNGTZAN, self).__init__()

        self.conv1 = nn.Conv2d(3, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.conv3 = nn.Conv2d(32, 64, 3)

        # 288, 432 ->(3X3) 286, 430 ->(max pool 2X2) 143, 215 
        # 143, 215 ->(3X3) 141, 213 ->(max pool 2X2) 70, 106 
        # 70, 106  ->(3X3) 68, 104  ->(max pool 2X2) 34, 52
        self.fc1 = nn.Linear(64 * 34 * 52, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # convolution layer 1 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        # convolution layer 2 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # convolution layer 3 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)

        # flatten x to (batch_size, 64 * 34 * 52) matrix - per instance flatten
        x = torch.flatten(x, start_dim=1)

        # fully connected linear layers with relu activation function
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # last fc linear layer
        x = self.fc3(x)

        return x

In [56]:
gtzan = CNNGTZAN().to(device=device)

In [52]:
def train(model, n_epochs):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.0001)
  
  history = []
  for e in range(1, n_epochs + 1):
    for counter, data in enumerate(trainloader):
      inputs, labels = data

      predicted_labels = model(inputs.to(device=device))
      optimizer.zero_grad()
      loss = criterion(predicted_labels, labels.to(device=device))
      loss.backward()
      optimizer.step()

    print(f'Epoch {e}')
    history.append(evaluate(model, criterion))

  return history

In [53]:
def evaluate(model, criterion, sets=['train', 'validation']):
  with torch.no_grad():
    sets_metrics = dict()

    for set_name, dataloader in data_loaders.items():
      if set_name in sets:
        recall = metrics.Recall(num_classes=NUM_CLASSES, average='macro').to(device=device)
        precision = metrics.Precision(num_classes=NUM_CLASSES, average='macro').to(device=device)
        accuracy = metrics.Accuracy().to(device=device)
        loss = 0

        for inputs, labels in dataloader:
          predicted_labels = model(inputs.to(device=device))
          labels = labels.to(device=device)
          loss += criterion(predicted_labels, labels.to(device=device)).item()

          recall.update(predicted_labels, labels)
          precision.update(predicted_labels, labels)
          accuracy.update(predicted_labels, labels)

        sets_metrics[set_name] = { 'recall': recall.compute().item(),
                                  'precision': precision.compute().item(),
                                  'accuracy': accuracy.compute().item(),
                                  'loss': loss / len(dataloader.dataset)}

    return sets_metrics

In [68]:
def plot(history):
  metrics_map = dict()

  for e_sets in history:
    for set_name, set_metrics in e_sets.items():
      for metric_name, metric_value in set_metrics.items():

        if metric_name not in metrics_map:
          metrics_map[metric_name] = dict()
        if set_name not in metrics_map[metric_name]:
          metrics_map[metric_name][set_name] = []

        metrics_map[metric_name][set_name].append(metric_value)
      
  for metric_name, sets in metrics_map.items():
    df = None
    for set_name, set_metrics in sets.items():
      size = len(set_metrics)
      if df is None:
        df = pd.DataFrame({"epoch": np.linspace(1, size, size),
                      metric_name: set_metrics,
                      "set": [set_name] * size})
      else:
        df = df.append(pd.DataFrame({"epoch": np.linspace(1, size, size),
                      metric_name: set_metrics,
                      "set": [set_name] * size}), ignore_index=True)

    fig = px.line(df, x="epoch", y=metric_name, line_group="set", title=f"epoch {metric_name} per dataset", color="set", hover_name="set")
    fig.show()

In [58]:
history = train(gtzan, 10)

Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10


In [69]:
plot(history)

**Transfer Learning**

In [70]:
from efficientnet_pytorch import EfficientNet

In [71]:
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=NUM_CLASSES, advprop=True)

Loaded pretrained weights for efficientnet-b0


In [None]:
# for param in model.parameters():
#   param.require_grad = False

In [None]:
model

In [72]:
class MyEfficientNet(nn.Module):

  def __init__(self, efficientNetModel):
    super(MyEfficientNet, self).__init__()

    self.efficientNetModel = efficientNetModel
    # output size: torch.Size([batch_size, 1280, 9, 14])
    self.avg_pool = nn.AvgPool2d(2)
    # 9, 14 => 4, 7
    self.lin = nn.Linear(1280*4*7, NUM_CLASSES)
    

  def forward(self, x):
    x = self.efficientNetModel.extract_features(x)

    x = self.avg_pool(x)
    x = torch.flatten(x, start_dim=1)

    x = self.lin(x)
    x = nn.Softmax()(x)
   
    return x


In [73]:
gtzan_EfficientNet = MyEfficientNet(model).to(device=device)

In [74]:
history_EfficientNet = train(gtzan_EfficientNet, 30)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30


In [75]:
plot(history_EfficientNet)