<a href="https://colab.research.google.com/github/omier/music-genre-classifier/blob/master/DL_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/GTZAN.zip

In [5]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import math

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
img_data = 'Data/images_original/'
dataset = torchvision.datasets.ImageFolder(
    root=img_data,
    transform=torchvision.transforms.ToTensor(),
)

In [7]:
# 60% train, 20% validate, 20% test
trainset_size=math.ceil(len(dataset)*0.6)
valset_size=math.ceil(len(dataset)*0.2)
testset_size=len(dataset) - trainset_size - valset_size

trainset, valset, testset = torch.utils.data.random_split(dataset, [trainset_size, valset_size, testset_size])

In [8]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True)

valloader = torch.utils.data.DataLoader(valset, batch_size=16,
                                         shuffle=False)

testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False)

In [9]:
trainset[0][0].shape

torch.Size([3, 288, 432])

In [12]:
class CNNGTZAN(nn.Module):

    def __init__(self):
        super(CNNGTZAN, self).__init__()

        self.conv1 = nn.Conv2d(3, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.conv3 = nn.Conv2d(32, 64, 3)

        # 288, 432 ->(3X3) 286, 430 ->(max pool 2X2) 143, 215 
        # 143, 215 ->(3X3) 141, 213 ->(max pool 2X2) 70, 106 
        # 70, 106  ->(3X3) 68, 104  ->(max pool 2X2) 34, 52
        self.fc1 = nn.Linear(64 * 34 * 52, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # convolution layer 1 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        # convolution layer 2 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # convolution layer 3 (convolution -> relu -> max pool 2X2)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)

        # flatten x to (batch_size, 64 * 34 * 52) matrix - per instance flatten
        x = torch.flatten(x, start_dim=1)

        # fully connected linear layers with relu activation function
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # last fc linear layer
        x = self.fc3(x)

        return x

In [13]:
gtzan = CNNGTZAN().to(device=device)

In [28]:
PRINT_EVERY = 5
def train_loop(model, n_epochs):
  # Loss function
  criterion = nn.CrossEntropyLoss()

  # Optimizer (ADAM is a fancy version of SGD)
  optimizer = optim.Adam(model.parameters(), lr=0.0001)
  
  for e in range(1, n_epochs + 1):
    running_loss = 0
    for counter, data in enumerate(trainloader):
      inputs, labels = data

      predicted_labels = model(inputs.to(device=device))
      optimizer.zero_grad()
      loss = criterion(predicted_labels, labels.to(device=device))
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      
      if counter % PRINT_EVERY == 0:
          print('Epoch %d, %d/%d, Current Loss = %.4f' % (e, counter, len(trainloader), running_loss / PRINT_EVERY))
          running_loss = 0


In [None]:
!pip3 install pytorch_lightning

In [36]:
from pytorch_lightning import metrics

def evaluate(model, caption):
  with torch.no_grad():
    for dataset_descriptor, dataloader in [('validation', valloader), ('test', testloader)]:
      recall = metrics.Recall(num_classes=len(dataset.classes), average='macro').to(device=device)
      precision = metrics.Precision(num_classes=len(dataset.classes), average='macro').to(device=device)

      for inputs, labels in dataloader:
        
        predicted_labels = model(inputs.to(device=device))
        labels = labels.to(device=device)

        recall.update(predicted_labels, labels)
        precision.update(predicted_labels, labels)

      print("%s: %s - recall: %.3f precision: %.3f" % (caption, dataset_descriptor, recall.compute().item(), precision.compute().item()))

In [38]:
train_loop(gtzan, 10)

Epoch 1, 0/38, Current Loss = 0.2541
Epoch 1, 5/38, Current Loss = 1.6002
Epoch 1, 10/38, Current Loss = 1.5540
Epoch 1, 15/38, Current Loss = 1.8736
Epoch 1, 20/38, Current Loss = 1.5337
Epoch 1, 25/38, Current Loss = 1.6095
Epoch 1, 30/38, Current Loss = 1.7928
Epoch 1, 35/38, Current Loss = 1.4992
Epoch 2, 0/38, Current Loss = 0.2789
Epoch 2, 5/38, Current Loss = 1.4934
Epoch 2, 10/38, Current Loss = 1.4920
Epoch 2, 15/38, Current Loss = 1.5313
Epoch 2, 20/38, Current Loss = 1.5133
Epoch 2, 25/38, Current Loss = 1.6368
Epoch 2, 30/38, Current Loss = 1.7158
Epoch 2, 35/38, Current Loss = 1.6021
Epoch 3, 0/38, Current Loss = 0.2863
Epoch 3, 5/38, Current Loss = 1.5551
Epoch 3, 10/38, Current Loss = 1.4448
Epoch 3, 15/38, Current Loss = 1.4910
Epoch 3, 20/38, Current Loss = 1.6684
Epoch 3, 25/38, Current Loss = 1.4256
Epoch 3, 30/38, Current Loss = 1.5601
Epoch 3, 35/38, Current Loss = 1.5700
Epoch 4, 0/38, Current Loss = 0.2840
Epoch 4, 5/38, Current Loss = 1.4857
Epoch 4, 10/38, Curr

In [39]:
evaluate(gtzan, "gtzan")

gtzan: validation - recall: 0.443 precision: 0.506
gtzan: test - recall: 0.477 precision: 0.439
