<a href="https://colab.research.google.com/github/omier/music-genre-classifier/blob/master/DL_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title RUN Pre-Processing?
run_preprocessing = False #@param {type:"boolean"}


# Init Notebook

In [None]:
!git clone https://github.com/omier/music-genre-classifier.git

Cloning into 'music-genre-classifier'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 12890 (delta 1), reused 4 (delta 0), pack-reused 12883[K
Receiving objects: 100% (12890/12890), 1.70 GiB | 20.70 MiB/s, done.
Resolving deltas: 100% (18/18), done.
Checking out files: 100% (12995/12995), done.


In [None]:
!pip3 install pytorch_lightning efficientnet_pytorch

Collecting pytorch_lightning
[?25l  Downloading https://files.pythonhosted.org/packages/e7/d4/d2751586c7961f238a6077a6dc6e4a9214445da3219f463aa44b29fe4b42/pytorch_lightning-1.1.8-py3-none-any.whl (696kB)
[K     |▌                               | 10kB 21.8MB/s eta 0:00:01[K     |█                               | 20kB 18.2MB/s eta 0:00:01[K     |█▍                              | 30kB 14.8MB/s eta 0:00:01[K     |█▉                              | 40kB 13.5MB/s eta 0:00:01[K     |██▍                             | 51kB 11.9MB/s eta 0:00:01[K     |██▉                             | 61kB 12.0MB/s eta 0:00:01[K     |███▎                            | 71kB 11.7MB/s eta 0:00:01[K     |███▊                            | 81kB 12.7MB/s eta 0:00:01[K     |████▎                           | 92kB 11.5MB/s eta 0:00:01[K     |████▊                           | 102kB 11.0MB/s eta 0:00:01[K     |█████▏                          | 112kB 11.0MB/s eta 0:00:01[K     |█████▋                  

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import math
from pytorch_lightning import metrics
import plotly.express as px
import pandas as pd
import numpy as np
import pprint

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Pre-Processing

In [None]:
import librosa
from librosa import display
import matplotlib.pyplot as plt
import glob
import os
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

In [None]:
n_fft = 2048
hop_length = 512
n_mels = 288
song_length = 30
song_mini_batch_length = 3

def preprocess(filename, out):
  for offset in range(0, song_length, song_mini_batch_length):
    y, sr = librosa.load(filename, duration=song_mini_batch_length, sr=None, offset=offset)
    song, _ = librosa.effects.trim(y)
    
    S = librosa.feature.melspectrogram(song, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_DB = librosa.power_to_db(S, ref=np.max)

    fig = plt.Figure()
    canvas = FigureCanvas(fig)
    ax = fig.add_subplot(111)
    ax.set_axis_off()
    librosa.display.specshow(S_DB, ax=ax, y_axis='log', x_axis='time')
    fig.savefig(f'{out}_{offset}.png', transparent=True)

In [None]:
data_path = 'music-genre-classifier/Data/'
output_directory = 'melspectograms/'
genres = glob.glob(f'{data_path}genres_original/*')

def ensure_dir(dir):
  if not os.path.exists(dir):
    os.mkdir(dir)

if run_preprocessing:
  ensure_dir(f'{data_path}{output_directory}')
  for g in genres:
    waves = glob.glob(f'{g}/*')
    genre = g.split('/')[-1]
    for w in waves:
      filename = '.'.join(w.split('/')[-1].split('.')[:-1])
      ensure_dir(f'{data_path}{output_directory}{genre}')
      preprocess(w, f'{data_path}{output_directory}{genre}/{filename}')

# Load Data

In [None]:
img_data = 'music-genre-classifier/Data/melspectograms/'
dataset = torchvision.datasets.ImageFolder(
    root=img_data,
    transform=torchvision.transforms.ToTensor(),
)

In [None]:
len(dataset)

9990

In [None]:
NUM_CLASSES = len(dataset.classes)

In [None]:
# 60% train, 20% validate, 20% test
trainset_size=math.ceil(len(dataset)*0.6)
valset_size=math.ceil(len(dataset)*0.2)
testset_size=len(dataset) - trainset_size - valset_size

trainset, valset, testset = torch.utils.data.random_split(dataset, [trainset_size, valset_size, testset_size])

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True)

valloader = torch.utils.data.DataLoader(valset, batch_size=16,
                                         shuffle=False)

testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False)

In [None]:
data_loaders = {'train': trainloader, 'validation': valloader, 'test': testloader}

# Helpers

In [None]:
def train(model, n_epochs, criterion, trainloader):
  optimizer = optim.Adam(model.parameters(), lr=0.0001)
  
  history = []
  for e in range(1, n_epochs + 1):
    for counter, data in enumerate(trainloader):
      inputs, labels = data

      predicted_labels = model(inputs.to(device=device))
      optimizer.zero_grad()
      loss = criterion(predicted_labels, labels.to(device=device))
      loss.backward()
      optimizer.step()
    
    current_metrics = evaluate(model, criterion)
    print(f'Epoch {e}\\{n_epochs} Metrics')
    pprint.pprint(current_metrics, indent=4)

    history.append(current_metrics)

  return history

In [None]:
def evaluate(model, criterion, sets=['train', 'validation']):
  with torch.no_grad():
    sets_metrics = dict()

    for set_name, dataloader in data_loaders.items():
      if set_name in sets:
        recall = metrics.Recall(num_classes=NUM_CLASSES, average='macro').to(device=device)
        precision = metrics.Precision(num_classes=NUM_CLASSES, average='macro').to(device=device)
        accuracy = metrics.Accuracy().to(device=device)
        loss = 0

        for inputs, labels in dataloader:
          predicted_labels = model(inputs.to(device=device))
          labels = labels.to(device=device)
          loss += criterion(predicted_labels, labels.to(device=device)).item()

          recall.update(predicted_labels, labels)
          precision.update(predicted_labels, labels)
          accuracy.update(predicted_labels, labels)

        sets_metrics[set_name] = { 'recall': recall.compute().item(),
                                  'precision': precision.compute().item(),
                                  'accuracy': accuracy.compute().item(),
                                  'loss': loss / len(dataloader.dataset)}

    return sets_metrics

In [None]:
def plot(history):
  metrics_map = dict()

  for e_sets in history:
    for set_name, set_metrics in e_sets.items():
      for metric_name, metric_value in set_metrics.items():

        if metric_name not in metrics_map:
          metrics_map[metric_name] = dict()
        if set_name not in metrics_map[metric_name]:
          metrics_map[metric_name][set_name] = []

        metrics_map[metric_name][set_name].append(metric_value)
      
  for metric_name, sets in metrics_map.items():
    df = None
    for set_name, set_metrics in sets.items():
      size = len(set_metrics)
      if df is None:
        df = pd.DataFrame({"epoch": np.linspace(1, size, size),
                      metric_name: set_metrics,
                      "set": [set_name] * size})
      else:
        df = df.append(pd.DataFrame({"epoch": np.linspace(1, size, size),
                      metric_name: set_metrics,
                      "set": [set_name] * size}), ignore_index=True)

    fig = px.line(df, x="epoch", y=metric_name, line_group="set", title=f"epoch {metric_name} per dataset", color="set", hover_name="set")
    fig.show()

# CNN 3 Conv 3 Linear
convolution layer 1 (convolution -> relu -> max pool 2X2)

convolution layer 2 (convolution -> relu -> max pool 2X2)

convolution layer 3 (convolution -> relu -> max pool 2X2)

3 fully connected linear layers with relu activation function

In [None]:
class CNNGTZAN(nn.Module):

    def __init__(self):
        super(CNNGTZAN, self).__init__()

        self.conv1 = nn.Conv2d(3, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.conv3 = nn.Conv2d(32, 64, 3)

        # 288, 432 ->(3X3) 286, 430 ->(max pool 2X2) 143, 215 
        # 143, 215 ->(3X3) 141, 213 ->(max pool 2X2) 70, 106 
        # 70, 106  ->(3X3) 68, 104  ->(max pool 2X2) 34, 52
        self.fc1 = nn.Linear(64 * 34 * 52, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # convolution layer 1 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        # convolution layer 2 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # convolution layer 3 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)

        # flatten x to (batch_size, 64 * 34 * 52) matrix - per instance flatten
        x = torch.flatten(x, start_dim=1)

        # fully connected linear layers with relu activation function
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # last fc linear layer
        x = self.fc3(x)

        return x

In [None]:
baseline_cnn = CNNGTZAN().to(device=device)

In [None]:
%%time
baseline_cnn_history = train(baseline_cnn, 15, nn.CrossEntropyLoss(), trainloader)

Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.4235902428627014,
                 'loss': 0.09872361773007864,
                 'precision': 0.47811245918273926,
                 'recall': 0.42261871695518494},
    'validation': {   'accuracy': 0.41041040420532227,
                      'loss': 0.10058219762177796,
                      'precision': 0.4740615785121918,
                      'recall': 0.4130702614784241}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.5388721823692322,
                 'loss': 0.08294621066208636,
                 'precision': 0.5929979085922241,
                 'recall': 0.5403229594230652},
    'validation': {   'accuracy': 0.5125125050544739,
                      'loss': 0.08583130961185223,
                      'precision': 0.5664952993392944,
                      'recall': 0.5072112679481506}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.612278938293457,
                 'loss': 0.07233592368659872,
                 'precision': 0.61

In [None]:
plot(baseline_cnn_history)

# 4L-2D CNN



In [None]:
class BigCNN(nn.Module):
    def __init__(self):
        super(BigCNN, self).__init__()

        # 4 layers of convolution and max pooling
        self._extractor = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=8),
        )
        
       
        # some linear layers for classification
        self._classifier = nn.Sequential(nn.Dropout(0.2),
                                         nn.Linear(in_features=3*2*256, out_features=512),
                                         nn.ReLU(),
                                         nn.Linear(in_features=512, out_features=256),
                                         nn.ReLU(),
                                         nn.Linear(in_features=256, out_features=NUM_CLASSES))

    def forward(self, x):
        # torch.Size([16, 3, 288, 432])

        x = self._extractor(x)
        # BATCH_SIZE, CHANNELS, FREQUENCY, TIME
        # torch.Size([16, 256, 2, 3])
       
        x = x.view(x.size(0), -1)
        # BATCH_SIZE, 256 * 2 * 3
        # torch.Size([16, 1536])

        score = self._classifier(x)
        # torch.Size([16, 10])
        return score


In [None]:
big_cnn = BigCNN().to(device=device)

In [None]:
%%time
big_cnn_history = train(big_cnn, 15, nn.CrossEntropyLoss(), trainloader)

Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.5925925970077515,
                 'loss': 0.07315428420269851,
                 'precision': 0.6206859946250916,
                 'recall': 0.5940846800804138},
    'validation': {   'accuracy': 0.5655655860900879,
                      'loss': 0.07567180124847023,
                      'precision': 0.5964350700378418,
                      'recall': 0.5667811632156372}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.6891891956329346,
                 'loss': 0.056789615694626115,
                 'precision': 0.7169680595397949,
                 'recall': 0.6886166334152222},
    'validation': {   'accuracy': 0.6581581830978394,
                      'loss': 0.06107522186275956,
                      'precision': 0.6829843521118164,
                      'recall': 0.6580643653869629}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.7380713820457458,
                 'loss': 0.04715207513467765,
                 'precision': 0.759

In [None]:
plot(big_cnn_history)

# 4L-2D CNN + GRU (fresh cnn with GRU)

In [None]:
class CNNGRU(nn.Module):
    def __init__(self):
        super(CNNGRU, self).__init__()

        # 4 layers of convolution and max pooling
        self._extractor = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=8),
        )
        
        # bidirectional GRU model with 3 hidden layers
        self._rnnModule = nn.GRU(512, 512, bidirectional=True, num_layers=3)
                                       

        # some linear layers for classification
        self._classifier = nn.Sequential(nn.Dropout(0.2),
                                         nn.Linear(in_features=3*2*512, out_features=512),
                                         nn.ReLU(),
                                         nn.Linear(in_features=512, out_features=256),
                                         nn.ReLU(),
                                         nn.Linear(in_features=256, out_features=NUM_CLASSES))

    def forward(self, x):
        # torch.Size([16, 3, 288, 432])

        x = self._extractor(x)
        # BATCH_SIZE, CHANNELS, FREQUENCY, TIME
        # torch.Size([16, 256, 2, 3])

        x = x.permute(0, 3, 1, 2)
        # BATCH_SIZE, TIME, CHANNELS, FREQUENCY
        # torch.Size([16, 3, 256, 2])

        x = x.view(x.size(0), x.size(1), -1)
        # BATCH_SIZE, TIME, CHANNELS*FREQUENCY
        # torch.Size([16, 3, 512])
      
        x, hn = self._rnnModule(x)
        # BATCH_SIZE, TIME, 512 * 2
        # torch.Size([16, 3, 1024])
       
        x = x.view(x.size(0), -1)
        # BATCH_SIZE, 512 * 2 * 3
        # torch.Size([16, 3072])

        score = self._classifier(x)
        # torch.Size([16, 10])
        return score


In [None]:
big_cnn_gru = CNNGRU().to(device=device)

In [None]:
%%time
big_cnn_gru_history = train(big_cnn_gru, 15, nn.CrossEntropyLoss(), trainloader)

Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.372205525636673,
                 'loss': 0.10479964123473869,
                 'precision': 0.31947651505470276,
                 'recall': 0.3687557280063629},
    'validation': {   'accuracy': 0.3598598539829254,
                      'loss': 0.10609423577248513,
                      'precision': 0.3059137761592865,
                      'recall': 0.36696144938468933}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.5490490198135376,
                 'loss': 0.07530082044778047,
                 'precision': 0.5383242964744568,
                 'recall': 0.5475449562072754},
    'validation': {   'accuracy': 0.5195195078849792,
                      'loss': 0.07782897004136094,
                      'precision': 0.5123907923698425,
                      'recall': 0.5205775499343872}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.6558225154876709,
                 'loss': 0.05975577449516173,
                 'precision': 0.698

In [None]:
plot(big_cnn_gru_history)

# EfficientNet-b0 Transfer Learning
EfficientNet layers

Adaptive Average Pool 2d

Optional GRU layers defined by a parameter

Linear layer

Softmax

In [None]:
from efficientnet_pytorch import EfficientNet

In [None]:
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=NUM_CLASSES, advprop=True)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth" to /root/.cache/torch/hub/checkpoints/adv-efficientnet-b0-b64d5a18.pth


HBox(children=(FloatProgress(value=0.0, max=21389172.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b0


In [None]:
class MyEfficientNet(nn.Module):

  def __init__(self, efficientNetModel, use_GRU=False):
    super(MyEfficientNet, self).__init__()

    self.efficientNetModel = efficientNetModel
    # output size: torch.Size([batch_size, 1280, 9, 14])
    self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1280, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
    )
   
    self.pool = nn.AdaptiveAvgPool2d(2)

    self._rnnModule = nn.GRU(512, 512, bidirectional=True, num_layers=3)

    self.use_GRU = use_GRU
    if self.use_GRU:
      lin_size = 512 * 2 * 2
    else:
      lin_size = 1280 * 2 * 2

    self.lin = nn.Linear(lin_size, NUM_CLASSES)
    

  def forward(self, x):
    x = self.efficientNetModel.extract_features(x)

    x = self.pool(x)
    
    if self.use_GRU:
      x = self.conv1(x)
      # 16, 256, 2, 2

      x = x.permute(0, 3, 1, 2)
      # BATCH_SIZE, TIME, CHANNELS, FREQUENCY
      # torch.Size([16, 2, 256, 2])

      x = x.view(x.size(0), x.size(1), -1)
      # BATCH_SIZE, TIME, CHANNELS*FREQUENCY
      # torch.Size([16, 2, 512])
        
      x, hn = self._rnnModule(x)
      # BATCH_SIZE, TIME, 512 * 2
      # torch.Size([16, 2, 1024])
        
    x = x.view(x.size(0), -1)
    # BATCH_SIZE, 512 * 2 * 2
    # torch.Size([16, 2048])

    x = self.lin(x)
    x = nn.Softmax()(x)
   
    return x


In [None]:
gtzan_EfficientNet = MyEfficientNet(model).to(device=device)

In [None]:
%%time
history_EfficientNet = train(gtzan_EfficientNet, 15, nn.CrossEntropyLoss(), trainloader)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.7452452182769775,
                 'loss': 0.10784227461428256,
                 'precision': 0.7485470175743103,
                 'recall': 0.7455995678901672},
    'validation': {   'accuracy': 0.7282282114028931,
                      'loss': 0.10891202846924225,
                      'precision': 0.7311700582504272,
                      'recall': 0.7240079045295715}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.8038038015365601,
                 'loss': 0.10392626872411123,
                 'precision': 0.8058317303657532,
                 'recall': 0.8038555383682251},
    'validation': {   'accuracy': 0.7912912964820862,
                      'loss': 0.10506659859532232,
                      'precision': 0.7904024124145508,
                      'recall': 0.7880927324295044}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.8426759839057922,
                 'loss': 0.10164568990638027,
                 'precision': 0.8519

In [None]:
plot(history_EfficientNet)

In [None]:
gtzan_EfficientNet_with_GRU = MyEfficientNet(model, use_GRU=True).to(device=device)

In [None]:
%%time
history_EfficientNet_with_GRU = train(gtzan_EfficientNet_with_GRU, 15, nn.CrossEntropyLoss(), trainloader)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.939105749130249,
                 'loss': 0.09539934212182179,
                 'precision': 0.9414576888084412,
                 'recall': 0.9393799901008606},
    'validation': {   'accuracy': 0.8673673868179321,
                      'loss': 0.09972984493673742,
                      'precision': 0.8685998916625977,
                      'recall': 0.8666250109672546}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.9397730827331543,
                 'loss': 0.09529727438827097,
                 'precision': 0.9405637979507446,
                 'recall': 0.9400680661201477},
    'validation': {   'accuracy': 0.868868887424469,
                      'loss': 0.09968636021599755,
                      'precision': 0.8704971671104431,
                      'recall': 0.8686513900756836}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.9504504799842834,
                 'loss': 0.09462831312312577,
                 'precision': 0.951738

In [None]:
plot(history_EfficientNet_with_GRU)

# VGG11 Transfer Learning

In [None]:
vgg11 = torchvision.models.vgg11(pretrained=True)
vgg11

Downloading: "https://download.pytorch.org/models/vgg11-bbd30ac9.pth" to /root/.cache/torch/hub/checkpoints/vgg11-bbd30ac9.pth


HBox(children=(FloatProgress(value=0.0, max=531456000.0), HTML(value='')))




VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [None]:
class MyVGG11(nn.Module):
    def __init__(self, model):
        super(MyVGG11, self).__init__()
        self.features = model.features
        self.avgpool = nn.AvgPool2d(2)
        self.classifier = nn.Sequential(
            nn.Linear(4*6*512, 8192),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(8192, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, NUM_CLASSES),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
myVGG11Model = MyVGG11(vgg11).to(device=device)

In [42]:
%%time
myvgg11history = train(myVGG11Model, 15, nn.CrossEntropyLoss(), trainloader)

Epoch 1\15 Metrics
{   'train': {   'accuracy': 0.7472472190856934,
                 'loss': 0.04732411259565904,
                 'precision': 0.7651553153991699,
                 'recall': 0.7486407160758972},
    'validation': {   'accuracy': 0.7267267107963562,
                      'loss': 0.04938063478923298,
                      'precision': 0.7447241544723511,
                      'recall': 0.7241265177726746}}
Epoch 2\15 Metrics
{   'train': {   'accuracy': 0.8366699814796448,
                 'loss': 0.03263299131606235,
                 'precision': 0.8514556884765625,
                 'recall': 0.8373978734016418},
    'validation': {   'accuracy': 0.7917917966842651,
                      'loss': 0.04034611234465638,
                      'precision': 0.8091114163398743,
                      'recall': 0.7888506054878235}}
Epoch 3\15 Metrics
{   'train': {   'accuracy': 0.9297630786895752,
                 'loss': 0.013806246597463408,
                 'precision': 0.934

In [43]:
plot(myvgg11history)

# Models Compare (test set)

In [44]:
models = {
    'baseline CNN': baseline_cnn,
    '4 layers CNN': big_cnn,
    '4 layers CNN with GRU': big_cnn_gru,
    'EfficientNet-b0': gtzan_EfficientNet,
    'EfficientNet-b0 with GRU': gtzan_EfficientNet_with_GRU,
    'VGG11': myVGG11Model,
}

for model_name, model in models.items():
  print(model_name)
  print(evaluate(model, nn.CrossEntropyLoss(), sets=['test']))

baseline CNN
{'test': {'recall': 0.6674073338508606, 'precision': 0.6767995357513428, 'accuracy': 0.6666666865348816, 'loss': 0.06085472447318477}}
4 layers CNN
{'test': {'recall': 0.8373075723648071, 'precision': 0.851702868938446, 'accuracy': 0.8358358144760132, 'loss': 0.03195561381155724}}
4 layers CNN with GRU
{'test': {'recall': 0.8603062033653259, 'precision': 0.8676202893257141, 'accuracy': 0.8593593835830688, 'loss': 0.03222034716171337}}
EfficientNet-b0



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



{'test': {'recall': 0.8980993628501892, 'precision': 0.8980218768119812, 'accuracy': 0.8963963985443115, 'loss': 0.0982457484091605}}
EfficientNet-b0 with GRU
{'test': {'recall': 0.8829814791679382, 'precision': 0.8837795257568359, 'accuracy': 0.8808808922767639, 'loss': 0.09881283248867001}}
VGG11
{'test': {'recall': 0.9333591461181641, 'precision': 0.9346113204956055, 'accuracy': 0.9329329133033752, 'loss': 0.021612720457657875}}
