**Βήμα 9:** CNN 2d με τις προδιαγραφές που ζητήθηκαν.
Ζητείται να εκπαιδευτεί και στο validation και να αξιολογηθεί μόνο στο test.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [2]:
import numpy as np
import gzip
import copy
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset
from torch.utils.data import SubsetRandomSampler, DataLoader
import os

class_mapping = {
    'Rock': 'Rock',
    'Psych-Rock': 'Rock',
    'Indie-Rock': None,
    'Post-Rock': 'Rock',
    'Psych-Folk': 'Folk',
    'Folk': 'Folk',
    'Metal': 'Metal',
    'Punk': 'Metal',
    'Post-Punk': None,
    'Trip-Hop': 'Trip-Hop',
    'Pop': 'Pop',
    'Electronic': 'Electronic',
    'Hip-Hop': 'Hip-Hop',
    'Classical': 'Classical',
    'Blues': 'Blues',
    'Chiptune': 'Electronic',
    'Jazz': 'Jazz',
    'Soundtrack': None,
    'International': None,
    'Old-Time': None
}


def torch_train_val_split(
        dataset, batch_train, batch_eval,
        val_size=.2, shuffle=True, seed=42):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset,
                              batch_size=batch_train,
                              sampler=train_sampler)
    val_loader = DataLoader(dataset,
                            batch_size=batch_eval,
                            sampler=val_sampler)
    return train_loader, val_loader


def read_spectrogram(spectrogram_file, chroma=True):
    with gzip.GzipFile(spectrogram_file, 'r') as f:
        spectrograms = np.load(f)
    # spectrograms contains a fused mel spectrogram and chromagram
    # Decompose as follows
    return spectrograms.T


class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])

        
class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[:self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1

        
class SpectrogramDataset(Dataset):
    def __init__(self, path, class_mapping=None, train=True, max_length=-1):
        t = 'train' if train else 'test'
        p = os.path.join(path, t)
        self.index = os.path.join(path, "{}_labels.txt".format(t))
        #print(self.index)
        self.files, labels = self.get_files_labels(self.index, class_mapping)
        self.feats = [read_spectrogram(os.path.join(p, f)) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)
        self.label_transformer = LabelTransformer()
        if isinstance(labels, (list, tuple)):
            self.labels = np.array(self.label_transformer.fit_transform(labels)).astype('int64')

    def get_files_labels(self, txt, class_mapping):
        with open(txt, 'r') as fd:
            lines = [l.rstrip().split('\t') for l in fd.readlines()[1:]]
        files, labels = [], []
        for l in lines:
            label = l[1]
            if class_mapping:
                label = class_mapping[l[1]]
            if not label:
                continue
            files.append(l[0])
            labels.append(label)
        return files, labels

    def __getitem__(self, item):
        l = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], l

    def __len__(self):
        return len(self.labels)

In [3]:
BATCH_SZ=32

specs = SpectrogramDataset('../input/data/data/fma_genre_spectrograms/', train=True, class_mapping=class_mapping, max_length=-1)
train_loader, val_loader = torch_train_val_split(specs, BATCH_SZ ,BATCH_SZ, val_size=0)
test_loader = DataLoader(SpectrogramDataset('../input/data/data/fma_genre_spectrograms/', train=False, class_mapping=class_mapping, max_length=-1))

In [4]:
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class ConvNet(nn.Module):
    def __init__(self,input_channels,out_channels,kernel_sz,stride,padding, num_classes):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(input_channels, 4, kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(4),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(4, 16, kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(16 , 32 , kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=3, stride=3)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(3,3), stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=3)
        )
        
        self.dense1= nn.Linear(6720,500) 
        self.dense2 = nn.Linear(500,10)
        
        
    def forward(self, x,lengths):
        #print(x.shape)
        x = x.transpose(1, 2)
        #print(x.shape)
        x.unsqueeze_(1)
        #print(x.shape)
        out1 = self.layer1(x)
        #print(out1.shape)
        out2= self.layer2(out1)
        #print(out2.shape)
        out3= self.layer3(out2)
        #print(out3.shape)
        out4= self.layer4(out3)
        #print(out4.shape)
        
    
        out_flat=out4.reshape(-1,out4.size(1)*out4.size(2)*out4.size(3))
        #print(out_flat.shape)
        
        
        #implementing fully connected layers
        
        hidden_out = self.dense1(out_flat)
        final_out = self.dense2(hidden_out)
        
        return final_out
        
    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if self.bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1)
        return outputs.gather(1, idx).squeeze()
    

In [5]:
num_epochs=35
kernel_sz=3
input_channels=1
out_channels=1
stride=2
padding=2
num_classes=10


device=torch.device("cuda")

model3 = ConvNet(input_channels,out_channels,kernel_sz,stride,padding ,num_classes)
model3.to(device)


ConvNet(
  (layer1): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  )
  (layer4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    

In [6]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model3.parameters(),lr=0.01)



for epoch in range(num_epochs):
    #no need to set requires_grad=True for parameters(weights) as it done by default. Also for input requires_grad is not
    #always necessary. So we comment the following line.
    #with torch.autograd(): 
    model3.train()
    #scheduler.step()
    running_average_loss = 0

    #train model in each epoch
    for index,instance in enumerate(train_loader):
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        #features,labels,lengths=instance
        
        features = instance[:][0].to(device)
        labels = instance[:][1].to(device)
        lengths = instance[:][2].to(device)
        features = features.type(torch.FloatTensor).to(device)

        optimizer.zero_grad()
        
        # Step 3. Run our forward pass.
        prediction_vec = model3(features,lengths)
        prediction_vec.to(device)
        #print(prediction_vec.shape)
        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = criterion(prediction_vec,labels)
        loss.backward(retain_graph=True)
        optimizer.step()

        running_average_loss += loss.detach().item()
    print("Epoch: {} \t \t Training Loss {}".format(epoch, float(running_average_loss) / (index + 1)))

Epoch: 0 	 	 Training Loss 1.9550755072946417
Epoch: 1 	 	 Training Loss 1.7853517548678672
Epoch: 2 	 	 Training Loss 1.6320572242344895
Epoch: 3 	 	 Training Loss 1.5808913626082957
Epoch: 4 	 	 Training Loss 1.494195333898884
Epoch: 5 	 	 Training Loss 1.266007518931611
Epoch: 6 	 	 Training Loss 1.1877651965781435
Epoch: 7 	 	 Training Loss 1.0898732110245588
Epoch: 8 	 	 Training Loss 1.721505875456823
Epoch: 9 	 	 Training Loss 1.5065337852255938
Epoch: 10 	 	 Training Loss 1.5415091873848275
Epoch: 11 	 	 Training Loss 1.2349792122840881
Epoch: 12 	 	 Training Loss 1.022954719523861
Epoch: 13 	 	 Training Loss 0.9279517874325791
Epoch: 14 	 	 Training Loss 0.8711650289901315
Epoch: 15 	 	 Training Loss 0.7430572387290327
Epoch: 16 	 	 Training Loss 1.0745038488139844
Epoch: 17 	 	 Training Loss 0.6759969343061316
Epoch: 18 	 	 Training Loss 0.3477524216860941
Epoch: 19 	 	 Training Loss 0.2592628004208003
Epoch: 20 	 	 Training Loss 0.7926048901799607
Epoch: 21 	 	 Training Loss

In [7]:
"""
model3.eval()
acc = 0
n_samples = 0
with torch.no_grad():
    for index, batch in enumerate(val_loader):
        features = batch[:][0].to(device)
        labels = batch[:][1].to(device)
        lengths = batch[:][2].to(device)
        features = features.type(torch.FloatTensor).to(device)
        #print(features.shape)
        out = model3(features,lengths)
        out = out.to(device)
        out_scores = F.log_softmax(out,dim=1)
        
        value, y_pred = out_scores.max(1)

        acc += (labels == y_pred).sum().detach().item()
        n_samples += features.shape[0]

print("Score for validation set: " ,acc / n_samples)
"""

'\nmodel3.eval()\nacc = 0\nn_samples = 0\nwith torch.no_grad():\n    for index, batch in enumerate(val_loader):\n        features = batch[:][0].to(device)\n        labels = batch[:][1].to(device)\n        lengths = batch[:][2].to(device)\n        features = features.type(torch.FloatTensor).to(device)\n        #print(features.shape)\n        out = model3(features,lengths)\n        out = out.to(device)\n        out_scores = F.log_softmax(out,dim=1)\n        \n        value, y_pred = out_scores.max(1)\n\n        acc += (labels == y_pred).sum().detach().item()\n        n_samples += features.shape[0]\n\nprint("Score for validation set: " ,acc / n_samples)\n'

In [8]:
model3.eval()
acc = 0
n_samples = 0
with torch.no_grad():
    for index, batch in enumerate(test_loader):
        features = batch[:][0].to(device)
        labels = batch[:][1].to(device)
        lengths = batch[:][2].to(device)
        features = features.type(torch.FloatTensor).to(device)
        #print(features.shape)
        out = model3(features,lengths)
        out = out.to(device)
        #print(out.shape)
        out_scores = F.log_softmax(out,dim=0)
        
        value, y_pred = out_scores.max(0)

        acc += (labels == y_pred).sum().detach().item()
        n_samples += features.shape[0]

print("Score for test set: " ,acc / n_samples)

Score for test set:  0.6956521739130435
