This is a project implemented by Konstantinos Lampropoulos $$ $$
AM:1115201800092

Imports

In [3]:
from IPython.display import display,Math,Latex
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn 
import torch.nn.functional as F 
from torch.utils.data import TensorDataset, DataLoader
from torchmetrics.classification import ConfusionMatrix,F1Score

Question 1 : FeedForward Neural Network

Load Datasets

In [4]:
#Load Train set
X_train = np.load('./music_genre_data_di/train/mfccs/X.npy')
Labels_train = np.load('./music_genre_data_di/train/mfccs/labels.npy')

#Load Validation set
X_val = np.load('./music_genre_data_di/val/mfccs/X.npy')
Labels_val = np.load('./music_genre_data_di/val/mfccs/labels.npy')

#Load Test set
X_test = np.load('./music_genre_data_di/test/mfccs/X.npy')
Labels_test = np.load('./music_genre_data_di/test/mfccs/labels.npy')

Encode the labels

In [5]:
encoder  = LabelEncoder()

Labels_train_encoded = encoder.fit_transform(Labels_train)

Labels_val_encoded = encoder.fit_transform(Labels_val)

Labels_test_encoded = encoder.fit_transform(Labels_test)

Create torch Datasets

In [6]:
batch_size = 16
#Train set
X_train_tensor = torch.tensor(X_train,dtype=torch.float32)
Labels_train_tensor = torch.tensor(Labels_train_encoded,dtype=torch.long)
Dataset = TensorDataset(X_train_tensor,Labels_train_tensor)
Train_set = DataLoader(Dataset,batch_size=batch_size,shuffle=True)

#Val set
X_val_tensor = torch.tensor(X_val,dtype=torch.float32)
Labels_val_tensor = torch.tensor(Labels_val_encoded,dtype=torch.long)
Dataset = TensorDataset(X_val_tensor,Labels_val_tensor)
Validation_set = DataLoader(Dataset,batch_size=batch_size,shuffle=True)

#Test set
X_test_tensor = torch.tensor(X_test,dtype=torch.float32)
Labels_test_tensor = torch.tensor(Labels_test_encoded,dtype=torch.long)
Dataset = TensorDataset(X_test_tensor,Labels_test_tensor)
Test_set = DataLoader(Dataset,batch_size=batch_size)

Set CPU or GPU

In [7]:
if torch.cuda.is_available():
   device="cuda"
else:
   device="cpu"
print("Device =",device)

Device = cuda


Create Feed Forward Neural Network Class

In [8]:
class FeedForwardNeuralNetwork(nn.Module):
    def __init__(self):
        super(FeedForwardNeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(26,128),
            nn.ReLU(),
            nn.Linear(128,32),
            nn.ReLU(),
            nn.Linear(32,4),
        )
    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits
    
model = FeedForwardNeuralNetwork().to(device)

In [60]:
def Train(dataLoader,model,loss_fn,optimizer,scheduler1 = None,scheduler2 = None):
    size = len(dataLoader.dataset)
    for batch, (X,y) in enumerate(dataLoader):
        #Load to GPU
        X = X.to(device)
        y = y.to(device)

        #Prediction and loss
        pred = model(X)
        loss = loss_fn(pred,y)

        #BackPropagation

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    if scheduler1:
        scheduler1.step()
    if scheduler2:
        scheduler2.step()


In [10]:
def Test(dataloader,model,loss_fn):
    size = len(dataloader.dataset)
    test_loss,correct,f1 = 0,0,0

    with torch.no_grad():
        for X,y in dataloader:
            #Load to GPU
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            f1_score = F1Score(task='multiclass',num_classes=4,average='macro').to(device)
            f1 += f1_score(pred.argmax(1),y)

    test_loss /= size
    correct /= size
    f1 /= size
    confmat = ConfusionMatrix('multiclass',num_classes=4).to(device)
    confusion_matrix = confmat(pred.argmax(1),y)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f},Avg F1-Score: {f1:>8f}\n")
    print(f"Confusion Matrix \n: {confusion_matrix}\n")

In [11]:
def Validation(dataloader,model):
    size = len(dataloader.dataset)
    f1 = 0

    with torch.no_grad():
        for X,y in dataloader:
            #Load to GPU
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            f1_score = F1Score(task='multiclass',num_classes=4,average='macro').to(device)
            f1 += f1_score(pred.argmax(1),y)
    f1 /= size
    return f1

In [None]:
loss_fn = nn.CrossEntropyLoss()
best_f1 = 0
learning_rate = 0.002

optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

epochs_count = 30
for t in range(epochs_count):
    print(f"Epoch {t+1}\n-------------------------------")
    Train(Train_set,model,loss_fn,optimizer)
    f1 = Validation(Validation_set,model)
    if f1 >= best_f1:
        best_f1 = f1
        best_model = model
    Test(Test_set,model,loss_fn)
print("Best Model computed by finding the model with the highest f1 score on the validation set\n")
Test(Test_set,best_model,loss_fn)

GPU vs CPU runtime

* Με την χρήση CPU για την αξιολόγηση του μοντέλου πετυχαίνουμε 60.6% accuracy,Loss 0.060977 και f1  0.011394 σε 6.5s
* Με την χρήση GPU για την αξιολόγιση του μοντέλου πετυχαίνουμε 62.4% accuracy,Loss 0.061011 και f1  0.011620 σε 22.4s

Question 2 : Convolutional Neural Network

Set Seed

In [343]:
import random
from numpy.random import MT19937
from numpy.random import RandomState, SeedSequence
import torch.backends.cudnn

SEED = 12345
rs = RandomState(MT19937(SeedSequence(SEED)))
random.seed(SEED)
torch.manual_seed(SEED)
g_cuda = torch.Generator(device=device)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Seed Worker

In [344]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

Load the datasets

In [345]:
#Load Train set
X_train = np.load('./music_genre_data_di/train/melgrams/X.npy')
Labels_train = np.load('./music_genre_data_di/train/melgrams/labels.npy')

#Load Validation set
X_val = np.load('./music_genre_data_di/val/melgrams/X.npy')
Labels_val = np.load('./music_genre_data_di/val/melgrams/labels.npy')

#Load Test set
X_test = np.load('./music_genre_data_di/test/melgrams/X.npy')
Labels_test = np.load('./music_genre_data_di/test/melgrams/labels.npy')

Encode the labels

In [346]:
encoder  = LabelEncoder()

Labels_train_encoded = encoder.fit_transform(Labels_train)

Labels_val_encoded = encoder.fit_transform(Labels_val)

Labels_test_encoded = encoder.fit_transform(Labels_test)

Create Torch Datasets

In [347]:
batch_size = 8
g = torch.Generator()
g.manual_seed(0)

#Train set
X_train_tensor = torch.tensor(X_train,dtype=torch.float32)
X_train_tensor = X_train_tensor.unsqueeze(1)
Labels_train_tensor = torch.tensor(Labels_train_encoded,dtype=torch.long)
Dataset = TensorDataset(X_train_tensor,Labels_train_tensor)
Train_set = DataLoader(Dataset,batch_size=batch_size,worker_init_fn=seed_worker,generator=g)

#Val set
X_val_tensor = torch.tensor(X_val,dtype=torch.float32)
X_val_tensor = X_val_tensor.unsqueeze(1)
Labels_val_tensor = torch.tensor(Labels_val_encoded,dtype=torch.long)
Dataset = TensorDataset(X_val_tensor,Labels_val_tensor)
Validation_set = DataLoader(Dataset,batch_size=batch_size,worker_init_fn=seed_worker,generator=g)

#Test set
X_test_tensor = torch.tensor(X_test,dtype=torch.float32)
X_test_tensor = X_test_tensor.unsqueeze(1)
Labels_test_tensor = torch.tensor(Labels_test_encoded,dtype=torch.long)
Dataset = TensorDataset(X_test_tensor,Labels_test_tensor)
Test_set = DataLoader(Dataset,batch_size=batch_size,worker_init_fn=seed_worker,generator=g)

Create Convolutional Neural Network Class

In [348]:
class ConvolutionalNeuralNetwork(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(ConvolutionalNeuralNetwork, self).__init__()
        kernel_size, padding = 5, 2
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=kernel_size,padding=padding)
        self.bn1 = nn.BatchNorm2d(16)
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=kernel_size,padding=padding)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=kernel_size,padding=padding)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=kernel_size,padding=padding)
        self.bn4 = nn.BatchNorm2d(128)

        # Fully connected layers
        self.fc1 = nn.Linear(1024, 1024)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(1024, 256)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(256, 32)
        self.dropout3 = nn.Dropout(dropout_rate)
        self.fc4 = nn.Linear(32, 4)

    def forward(self, x):
        x = self.maxpool(F.elu(self.bn1(self.conv1(x))))
        x = self.maxpool(F.elu(self.bn2(self.conv2(x))))
        x = self.maxpool(F.elu(self.bn3(self.conv3(x))))
        x = self.maxpool(F.elu(self.bn4(self.conv4(x))))

        # Flatten the tensor
        x = torch.flatten(x,1)
        # Fully connected layers
        x = F.elu(self.fc1(x))
        x = F.elu(self.fc2(x))
        x = F.elu(self.fc3(x))
        x = self.fc4(x)

        return x
model = ConvolutionalNeuralNetwork(dropout_rate=0.5).to(device)


In [349]:
loss_fn = nn.CrossEntropyLoss()
best_f1 = 0
learning_rate = 2e-3
patience = 3
epochs_count = 60
patience_tol = epochs_count

optimizer = torch.optim.Adagrad(model.parameters(),lr=learning_rate,weight_decay=1e-7)
scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,verbose=True,T_max=100)
scheduler2 = torch.optim.lr_scheduler.ExponentialLR(optimizer,gamma=0.9,verbose=True)
for t in range(epochs_count):
    print(f"Epoch {t+1}\n-------------------------------")
    torch.cuda.synchronize()
    Train(Train_set,model,loss_fn,optimizer,scheduler1,scheduler2)
    f1 = Validation(Validation_set,model)
    if f1 >= best_f1:
        best_f1 = f1
        best_model = model
    else:
        patience+=1
    if patience == patience_tol:
        break
print("Best Model computed by finding the model with the highest f1 score on the validation set\n")
Test(Test_set,best_model,loss_fn)

Adjusting learning rate of group 0 to 2.0000e-03.
Adjusting learning rate of group 0 to 2.0000e-03.
Epoch 1
-------------------------------
loss: 1.354813  [    0/ 3200]
loss: 11.222861  [  800/ 3200]
loss: 9.694426  [ 1600/ 3200]
loss: 8.319596  [ 2400/ 3200]
Adjusting learning rate of group 0 to 1.9995e-03.
Adjusting learning rate of group 0 to 1.7996e-03.
Epoch 2
-------------------------------
loss: 11.757658  [    0/ 3200]
loss: 7.089171  [  800/ 3200]
loss: 7.519851  [ 1600/ 3200]
loss: 6.419196  [ 2400/ 3200]
Adjusting learning rate of group 0 to 1.7982e-03.
Adjusting learning rate of group 0 to 1.6184e-03.
Epoch 3
-------------------------------
loss: 9.046476  [    0/ 3200]
loss: 6.026916  [  800/ 3200]
loss: 6.436285  [ 1600/ 3200]
loss: 5.650746  [ 2400/ 3200]
Adjusting learning rate of group 0 to 1.6164e-03.
Adjusting learning rate of group 0 to 1.4548e-03.
Epoch 4
-------------------------------
loss: 8.185740  [    0/ 3200]
loss: 5.720587  [  800/ 3200]
loss: 6.261148  [ 

GPU vs CPU runtime

* Στο GPU έτρεξε σε 1m1.6s και επέστρεψε βέλτιστο μοντέλο με accuracy 65.6% ,Avg loss: 0.132755,Avg F1-Score: 0.012216
* Στο CPU έτρεξε σε χρόνο 26m25.1s και επέστρεψε βέλτιστο μοντέλο με  Accuracy: 65.0%, Avg loss: 0.152512,Avg F1-Score: 0.012084

Pooling And Padding

* Με max pooling με kernel = 2,padding = 2 η επίδοση αυξήθηκε στο 70.9% σε χρόνο 40.7s

Optimizers:

$
\begin{array}{|c|c|c|}
\hline
\text{Optimizer} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{SGD} & \text{70.9\%} & \text{0.012768}\\
\hline
\text{Adadelta} & \text{74.5\%} & \text{0.013245}\\
\hline
\text{Adagrad} & \text{75.1\%} & \text{0.013355}\\
\hline
\text{Adam} & \text{70.7\%} & \text{0.013004}\\
\hline
\text{AdamW} & \text{69.5\%} & \text{0.012643}\\
\hline
\text{Adamax} & \text{72.2\%} & \text{0.013016}\\
\hline
\text{ASGD} & \text{72.2\%} & \text{0.013016}\\
\hline
\text{NAdam} & \text{68.3\%} & \text{0.012598}\\
\hline
\text{RAdam} & \text{69.0\%} & \text{0.012522}\\
\hline
\text{RMsprop} & \text{70.9\%} & \text{0.012781}\\
\hline
\text{Rprop} & \text{71.3\%} & \text{0.012988}\\
\hline
\end{array}
$

Optimizers Comparsion

* Την καλύτερη απόδοση την είχε ο αλγόριθμος Adagrad που πέτυχε accuracy 75.1% με F1-Score 0.013355.Γενικά η απόκλιση των optimizers είναι μικρή με το μικρότερο accuracy να είναι του NAdam με 68.3% και το μέγιστο του AdaGrad με 75.1%.Οι αποκλίσεις αυτές εξαρτώνται και από το shuffling που υπαρχει στα train και validation sets αλλά και στο ότι κάποιοι αλγόριθμοι αποδίδουν καλύτερα σε διαφορετικά προβλήματα από κάποιους άλλους.

Question 3 : Improving Performance

Activators

$
\begin{array}{|c|c|c|}
\hline
\text{Activator} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{ReLu} & \text{32.0\%} & \text{0.005272}\\
\hline
\text{ELU} & \text{38.9\%} & \text{0.006511}\\
\hline
\text{HardShrink} & \text{33.1\%} & \text{0.005456}\\
\hline
\text{HardSigmoid} & \text{23.5\%} & \text{0.003706}\\
\hline
\text{HardTanh} & \text{29.0\%} & \text{0.004536}\\
\hline
\text{RReLU} & \text{30.8\%} & \text{0.005007}\\
\hline
\text{GELU} & \text{32.3\%} & \text{0.005344}\\
\hline
\text{SoftPlus} & \text{29.0\%} & \text{0.004536}\\
\hline
\end{array}
$

Schedulers

$
\begin{array}{|c|c|c|}
\hline
\text{Scheduler} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{LinearLR} & \text{30.5\%} & \text{0.004912}\\
\hline
\text{PolynomialLR} & \text{29.0\%} & \text{0.004536}\\
\hline
\text{ExponentialLR} & \text{63.7\%} & \text{0.011358}\\
\hline
\text{MultiStepLR} & \text{38.9\%} & \text{0.006511}\\
\hline
\text{CosineAnnealingLR} & \text{63.3\%} & \text{0.011417}\\
\hline
\end{array}
$

Batch Normalization

*  Accuracy: 73.3%, Avg loss: 0.062259,Avg F1-Score: 0.012951

Regularization

* Weight Decay

$
\begin{array}{|c|c|c|}
\hline
\text{Weight Decay} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{0.5} & \text{29.0\%} & \text{0.004536}\\
\hline
\text{1e-2} & \text{52.3\%} & \text{0.009304}\\
\hline
\text{1e-3} & \text{51.9\%} & \text{0.009138}\\
\hline
\text{1e-4} & \text{61.9\%} & \text{0.010879}\\
\hline
\text{1e-5} & \text{57.6\%} & \text{0.010176}\\
\hline
\text{1e-7} & \text{55.6\%} & \text{0.009886}\\
\hline
\end{array}
$

* Dropout Rate

$
\begin{array}{|c|c|c|}
\hline
\text{Dropout Rate} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{0.5} & \text{75.4\%} & \text{0.013121}\\
\hline
\text{0.2} & \text{75.4\%} & \text{0.013121}\\
\hline
\text{0.1} & \text{75.4\%} & \text{0.013121}\\
\hline
\end{array}
$

* Weight Decay and Dropout Rate

$
\begin{array}{|c|c|c|c|}
\hline
\text{Weight Decay} & \text{Dropout Rate} & \text{Accuracy} & \text{F1-Score}\\
\hline
\text{1e-4} & \text{0} & \text{63.2\%} & \text{0.011532}\\
\hline
\text{1e-4} & \text{0.1} & \text{63.2\%} & \text{0.011532}\\
\hline
\text{1e-4} & \text{0.2} & \text{63.2\%} & \text{0.011532}\\
\hline
\text{1e-4} & \text{0.5} & \text{63.2\%} & \text{0.011532}\\
\hline
\text{1e-4} & \text{0.7} & \text{63.2\%} & \text{0.011532}\\
\hline
\text{1e-4} & \text{1.0} & \text{63.2\%} & \text{0.011532}\\
\hline
\end{array}
$

Batch Size

$
\begin{array}{|c|c|c|c|}
\hline
\text{Batch Size} & \text{Accuracy} & \text{F1-Score} & \text{Time}\\
\hline
\text{2} & \text{66.6\%} & \text{0.089510} & \text{9m26.8s}\\
\hline
\text{4} & \text{71.7\%} & \text{0.048821} & \text{5m25.7s}\\
\hline
\text{8} & \text{72.3\%} & \text{0.024948} & \text{2m50.1s}\\
\hline
\text{16} & \text{63.2\%} & \text{0.011532} & \text{1m17.9s}\\
\hline
\text{32} & \text{53.3\%} & \text{0.005186} & \text{39.9s}\\
\hline
\text{64} & \text{53.9\%} & \text{0.015037} & \text{22.7s}\\
\hline
\text{128} & \text{51.3\%} & \text{0.001545} & \text{13.5s}\\
\hline
\end{array}
$

* Παρατηρούμε ότι όσο αυξάνεται το batch size μέχρι το 8 ,αυξάνεται και το accuracy ,ενώ υποδιπλασιάζονται τα F1-score και ο χρόνος εκτέλεσης.Υπό τις βέλτιστες συνθήκες το καλύτερο Batch Size θα ήταν 4(δηλαδή χωρίς να λάβουμε υπόψιν τον χρόνο εκτέλεσης).Όμως μιάς και ο χρόνος εκτέλεσης μας νοιάζει,το βέλτιστο batch size είναι το 8.Πέραν αυτού μειώνεται και το accuracy κατα πολύ,και το χρονικό κέρδος δεν αξίζει για την μείωση της απόδοσης του μοντέλου.

Early Stopping

$
\begin{array}{|c|c|c|c|}
\hline
\text{Patience} & \text{Accuracy} & \text{F1-Score} & \text{Time}\\
\hline
\text{3} & \text{73.2\%} & \text{0.012941} & \text{43.3s}\\
\hline
\text{5} & \text{73.1\%} & \text{0.012935} & \text{44.9s}\\
\hline
\text{6} & \text{73.8\%} & \text{0.012984} & \text{49.3s}\\
\hline
\text{7} & \text{74.2\%} & \text{0.013015} & \text{49.8s}\\
\hline
\text{10} & \text{72.7\%} & \text{0.012781} & \text{54.5s}\\
\hline
\text{15} & \text{72.8\%} & \text{0.012746} & \text{1m2.6s}\\
\hline
\text{None} & \text{75.4\%} & \text{0.013121} & \text{1m20.5s}\\
\hline
\end{array}
$


* Παρατηρώντας τα δεδομένα,αν θέλουμε να θέσουμε ένα patience για το δεδομένο μοντέλο,το βέλτιστο θα ήταν 7.Όμως αφού ο χρόνος ήδη δεν είναι πολύ απαιτητικός θα μπορούσαμε να μην θέσουμε.

Testing

In [351]:
from music_genre_data_di.youtube import youtube_to_melgram

* Inference

In [350]:
def Map_Type(type):
    if type == 0 : type_str = 'blues'
    elif type == 1 : type_str = 'classical'
    elif type == 2 : type_str = 'hiphop'
    else : type_str = 'rock_metal_hardrock'
    return type_str

In [352]:
def TestYT(dataloader,model):
    music_type = []
    music_percentage = {}
    with torch.no_grad():
        for X,y in dataloader:
            X = X.to(device)
            pred = model(X)
            music_type.append(pred.argmax(1))
    for tensor in music_type:
        unique_types, counts = torch.unique(tensor, return_counts=True)
        for type, count in zip(unique_types, counts):
            if type.item() in music_percentage:
                music_percentage[type.item()] += count.item()
            else:
                music_percentage[type.item()] = count.item()
    total_count = sum(music_percentage.values())
    percentages = {type: count / total_count * 100 for type, count in music_percentage.items()}              
    for type, percentage in percentages.items():
        print(f'Type: {Map_Type(type)}, Percentage: {percentage:.2f}%')
    return music_type

In [None]:
youtube_to_melgram(url = 'https://www.youtube.com/watch?v=9E6b3swbnWg',filename="./Classical.wav")
youtube_to_melgram(url = "https://www.youtube.com/watch?v=EDwb9jOVRtU",filename="./HipHop.wav")
youtube_to_melgram(url = "https://www.youtube.com/watch?v=OMaycNcPsHI",filename="./Rock.wav")
youtube_to_melgram(url = "https://www.youtube.com/watch?v=l45f28PzfCI",filename="./Blues.wav")

In [366]:
youtube_to_melgram(url = "https://www.youtube.com/watch?v=V5Ar0dKnl6Y",filename="./Fun.wav")




In [361]:
X = np.load('./Classical.npy')
X_tensor = torch.tensor(X,dtype=torch.float32)
X_tensor = X_tensor.unsqueeze(1)
y_tensor = torch.full((X_tensor.size(0),),0,dtype=torch.long)
tensor_dataset = TensorDataset(X_tensor,y_tensor)
data = DataLoader(tensor_dataset,shuffle=False,batch_size=8)
music = TestYT(data,model)

Type: classical, Percentage: 98.15%
Type: rock_metal_hardrock, Percentage: 1.11%
Type: blues, Percentage: 0.37%
Type: hiphop, Percentage: 0.37%


In [360]:
X = np.load('./HipHop.npy')
X_tensor = torch.tensor(X,dtype=torch.float32)
X_tensor = X_tensor.unsqueeze(1)
y_tensor = torch.full((X_tensor.size(0),),0,dtype=torch.long)
tensor_dataset = TensorDataset(X_tensor,y_tensor)
data = DataLoader(tensor_dataset,shuffle=False,batch_size=8)
music = TestYT(data,model)

Type: hiphop, Percentage: 53.89%
Type: rock_metal_hardrock, Percentage: 29.94%
Type: blues, Percentage: 9.88%
Type: classical, Percentage: 6.29%


In [359]:
X = np.load('./Blues.npy')
X_tensor = torch.tensor(X,dtype=torch.float32)
X_tensor = X_tensor.unsqueeze(1)
y_tensor = torch.full((X_tensor.size(0),),0,dtype=torch.long)
tensor_dataset = TensorDataset(X_tensor,y_tensor)
data = DataLoader(tensor_dataset,shuffle=False,batch_size=8)
music = TestYT(data,model)

Type: blues, Percentage: 22.36%
Type: classical, Percentage: 8.63%
Type: hiphop, Percentage: 14.70%
Type: rock_metal_hardrock, Percentage: 54.31%


In [358]:
X = np.load('./Rock.npy')
X_tensor = torch.tensor(X,dtype=torch.float32)
X_tensor = X_tensor.unsqueeze(1)
y_tensor = torch.full((X_tensor.size(0),),0,dtype=torch.long)
tensor_dataset = TensorDataset(X_tensor,y_tensor)
data = DataLoader(tensor_dataset,shuffle=False,batch_size=8)
music = TestYT(data,model)

Type: classical, Percentage: 4.17%
Type: rock_metal_hardrock, Percentage: 64.35%
Type: blues, Percentage: 26.39%
Type: hiphop, Percentage: 5.09%


* Από τα παραπάνω αποτελέσματα φαίνεται ότι το δίκτυο αναγνώρισε σωστά το είδος μουσικής του Nocturne op.9 No.2 του Choplin,αφού το προσδιόρισε κατα 98% Classical
* Προσδιόρισε επίσης σωστά το τραγούδι της Madonna,Hung up,ως Hip-Hop κατά 53.89%.
* Προσδιόρισε λάθος το τραγούδι του BB King,How Blue Can You Get,τ'οποίο εν τέλει το έθεσε στην κατηγορία Rock,ενώ το τραγούδι είναι Blues.
* Τέλος το τραγούδι των Placebo ,Every You Every Me,το προσδιόρισε ορθά ως Rock κατά 64.35% .

Λεπτομέρειες

* Οι υπολογισμοί σε όλα τα ερωτήματα έγιναν σε local PC ,με specs CPU: AMD R9 3900x και GPU : NVIDIA RTX 3080,επομένως οι χρόνοι εκτέλεσης μπορεί να διαφέρουν αρκετά από τους χρόνους εκτέλεσης του Notebook στο Colab.