In [1]:
import numpy as np

import torch
import torch.utils as utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import transforms, datasets

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# hyperparameters
batch_size = 32
epochs = 20

In [4]:
train_transform = transforms.Compose([
#     transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
])

trainset = datasets.ImageFolder("../../data/train/spectrogram/", train_transform)
validationset = datasets.ImageFolder("../../data/validation/spectrogram/", train_transform)
testset = datasets.ImageFolder("../../data/test/spectrogram/", test_transform)

trainloader = utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
validationloader = utils.data.DataLoader(validationset, batch_size=batch_size, shuffle=True)
testloader = utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

In [5]:
class AudioNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32*13*40, 1024)
        self.fc2 = nn.Linear(1024, 4)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
#         print(x.shape)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x
    
    
model = AudioNN()

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [7]:
def train(model, epochs, trainloader, validationloader, criterion, optimizer, device):
    model.to(device)
    
    for epoch in range(epochs):
        running_loss = 0.0
        vrunning_loss = 0.0
#         best_loss = np.Inf
        correct = 0.0
        vcorrect = 0.0
        model.train()
        for batch, (data, labels) in enumerate(trainloader):
            data, labels = data.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model.forward(data)
            loss = criterion(output, labels)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            _, preds = torch.max(output, 1)
            correct += torch.sum(preds == labels.data).item()
        
        model.eval()
        for batch, (data, labels) in enumerate(validationloader):
            data, labels = data.to(device), labels.to(device)
#             optimizer.zero_grad()
            output = model.forward(data)
            loss = criterion(output, labels)
            vrunning_loss += loss.item()
#             loss.backward()
#             optimizer.step()
            _, preds = torch.max(output, 1)
            vcorrect += torch.sum(preds == labels.data).item()
            
            # accuracy
#             if batch%2==best_loss<=running_loss:
#                 best_loss = running_loss
        taccuracy = 100*(correct/len(trainloader.dataset))
        vaccuracy = 100*(vcorrect/len(validationloader.dataset))
        print(f"Epoch: {epoch+1}, Training Loss: {running_loss/len(trainloader.dataset):.4f}, Training Accuracy: {taccuracy:.4f}%\
        Validation Loss: {vrunning_loss/len(validationloader.dataset):.4f}, Validation Accuracy: {vaccuracy:.4f}%")

In [8]:
def test(model, testloader, device="cpu"):
    model.to(device)
    correct = 0.0
    with torch.no_grad():
        for data, labels in testloader:
            output = model.forward(data)
            _, preds = torch.max(output, 1)
            correct += torch.sum(preds == labels.data).item()
    accuracy = 100*(correct/len(testloader.dataset))
    print(f"Training Accuracy: {accuracy}%")

In [9]:
train(model, epochs, trainloader, validationloader, criterion, optimizer, device)
test(model, testloader)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 1, Training Loss: 0.0428, Training Accuracy: 33.8174%        Validation Loss: 0.0430, Validation Accuracy: 30.9009%
Epoch: 2, Training Loss: 0.0426, Training Accuracy: 33.7285%        Validation Loss: 0.0435, Validation Accuracy: 30.9009%
Epoch: 3, Training Loss: 0.0425, Training Accuracy: 34.9733%        Validation Loss: 0.0428, Validation Accuracy: 32.8829%
Epoch: 4, Training Loss: 0.0425, Training Accuracy: 35.1215%        Validation Loss: 0.0429, Validation Accuracy: 33.6937%
Epoch: 5, Training Loss: 0.0422, Training Accuracy: 36.4256%        Validation Loss: 0.0429, Validation Accuracy: 33.2432%
Epoch: 6, Training Loss: 0.0420, Training Accuracy: 36.6034%        Validation Loss: 0.0431, Validation Accuracy: 33.7838%
Epoch: 7, Training Loss: 0.0419, Training Accuracy: 36.9591%        Validation Loss: 0.0431, Validation Accuracy: 35.3153%
Epoch: 8, Training Loss: 0.0417, Training Accuracy: 37.6408%        Validation Loss: 0.0431, Validation Accuracy: 34.1441%
Epoch: 9, Traini

In [10]:
from sklearn.metrics import classification_report

model.to("cpu")
actual_class = []
pred_class = []

for (lyrics, label) in testloader:
        label, lyrics = label.to("cpu"), lyrics.to("cpu")
        with torch.no_grad():
            output = model.forward(lyrics)
        
        actual_class+=label.cpu().numpy().squeeze().tolist()
        pred_class+=output.argmax(1).cpu().numpy().squeeze().tolist()
        
print(classification_report(actual_class, pred_class))

              precision    recall  f1-score   support

           0       0.36      0.55      0.44       347
           1       0.19      0.09      0.12       194
           2       0.32      0.27      0.29       275
           3       0.26      0.22      0.24       212

    accuracy                           0.32      1028
   macro avg       0.28      0.28      0.27      1028
weighted avg       0.30      0.32      0.30      1028



In [11]:
torch.save(model.state_dict(), "base.v2.36")