In [108]:
import numpy as np
from BoomboxProcessor import BoomboxProcessor
from train_encoding_model import BoomboxNet
from GenreClassifier import GenreClassifier
import torch.nn as nn
import torch
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torchmetrics.functional import accuracy as torch_acc
from sklearn.metrics import classification_report

In [73]:
data_folder = ["90s_hiphop", "90s_rock", "2010s_pop", "classical", "country"]

boombox = BoomboxProcessor(verbose=True)
boombox.load_trajectories(data_folder) # load trajectories
boombox.load_encoding_model('models/model_50000.pt', BoomboxNet)
boombox.encode_trajectories() # encode trajectories
boombox.split_encoded_trajectories(10)
trajectories, labels = boombox.get_all_songlet_trajectories() # get all songlet trajectories

Loaded 93 trajectories from 90s_hiphop
Loaded 70 trajectories from 90s_rock
Loaded 67 trajectories from 2010s_pop
Loaded 60 trajectories from classical
Loaded 75 trajectories from country


100%|██████████| 93/93 [00:00<00:00, 118.04it/s]
100%|██████████| 70/70 [00:00<00:00, 121.01it/s]
100%|██████████| 67/67 [00:00<00:00, 127.02it/s]
100%|██████████| 60/60 [00:00<00:00, 110.08it/s]
100%|██████████| 75/75 [00:00<00:00, 132.48it/s]


In [74]:
one_hot_encoder = OneHotEncoder()
labels = one_hot_encoder.fit_transform(labels.reshape(-1, 1)).toarray()

In [75]:
x_train, x_test, y_train, y_test = train_test_split(trajectories, labels, test_size=0.15, random_state=42)

In [76]:
# Convert to tensors
x_train = torch.tensor(x_train).float()
y_train = torch.tensor(y_train).float()
x_test = torch.tensor(x_test).float()
y_test = torch.tensor(y_test).float()

In [77]:
# does y_test contain all the genres?
print(y_test.sum(axis=0))

tensor([18.,  8.,  7.,  8., 14.])


In [78]:
trainDataLoader = DataLoader(list(zip(x_train, y_train)), batch_size=16, shuffle=True)
testDataLoader = DataLoader(list(zip(x_test, y_test)), batch_size=16, shuffle=True)

In [104]:
import torch
import torch.nn as nn

class GenreClassifier(nn.Module):
    def __init__(self, classes=5):
        super(GenreClassifier, self).__init__()

        self.conv1 = nn.Conv1d(10, 8, kernel_size=3, stride=1)
        self.bn1 = nn.BatchNorm1d(8)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.AvgPool1d(kernel_size=2)

        self.conv2 = nn.Conv1d(8, 16, kernel_size=3, stride=1)
        self.bn2 = nn.BatchNorm1d(16)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AvgPool1d(kernel_size=2)

        self.conv3 = nn.Conv1d(16, 32, kernel_size=3, stride=1)
        self.bn3 = nn.BatchNorm1d(32)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.AvgPool1d(kernel_size=2)

        self.conv4 = nn.Conv1d(32, 64, kernel_size=3, stride=1)
        self.bn4 = nn.BatchNorm1d(64)
        self.relu4 = nn.ReLU()
        self.pool4 = nn.AvgPool1d(kernel_size=2)

        self.conv5 = nn.Conv1d(64, 128, kernel_size=3, stride=1)
        self.bn5 = nn.BatchNorm1d(128)
        self.relu5 = nn.ReLU()
        self.pool5 = nn.AvgPool1d(kernel_size=2)

        self.flatten = nn.Flatten()

        self.dropout = nn.Dropout(p=0.3)

        self.fc = nn.Linear(2816, classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.pool3(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.pool4(x)

        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        x = self.pool5(x)

        x = self.flatten(x)

        x = self.dropout(x)

        x = self.fc(x)
        x = self.softmax(x)

        return x


In [105]:
def train(x_train, y_train, x_test, y_test, epochs=100, device="cuda"):
    model = GenreClassifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    losses_train = []
    losses_test = []
    accuracies_test = []
    accuracies_train = []
    iterations = []
    iter = 0
    
    for epoch in tqdm(range(int(epochs)), desc='Training Epochs'):
        model.train()
        
        totalTrainLoss = 0
        trainCorrect = 0
        
        for (x,y) in trainDataLoader:
            x = x.to(device)
            y = y.to(device)
            
            outputs = model(x)
            loss = criterion(outputs, torch.argmax(y, 1))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            totalTrainLoss += loss.item()
            trainCorrect += torch.sum(torch.argmax(outputs, 1) == torch.argmax(y, 1))
            
        trainLoss = totalTrainLoss / len(trainDataLoader)
        trainCorrect = trainCorrect / len(x_train)
        print("[INFO] EPOCH: {}/{}".format(epoch + 1, epochs))
        print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(trainLoss, trainCorrect))
        
        

In [None]:
train(x_train, y_train, x_test, y_test, epochs=100, device="cuda")

In [107]:
torch.save(model.state_dict(), 'models/genre_cnn_100.pt')

In [None]:
with torch.no_grad():
	# set the model in evaluation mode
	model.eval()
	
	# initialize a list to store our predictions
	preds = []
	# loop over the test set
	for (x, y) in testDataLoader:
		# send the input to the device
		x = x.to('cuda')
		# make the predictions and add them to the list
		pred = model(x)
		preds.extend(pred.argmax(axis=1).cpu().numpy())
# generate a classification report
print(classification_report(testDataLoader.targets.cpu().numpy(),
	np.array(preds), target_names=['class_{}'.format(i) for i in range(5)]))