In [1]:
import os
import random
import pickle
import torch
import torchvision
import pandas as pd
import numpy as np

In [2]:
import torchvision.transforms as transforms

In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
random.seed(123)
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed(123)
torch.backends.cudnn.deterministic = True

In [6]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

In [31]:
from torch import nn
class MyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.conv0 = nn.Sequential(
            nn.Conv2d(1, 64, (128, 3), padding=(0, 1)),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.conv1 = nn.Sequential(
            nn.Conv2d(64, 64, (1, 3), padding=(0,1)),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, (1, 3), padding=(0,1)),
            nn.BatchNorm2d(64)
        )

        self.skip1 = nn.Sequential(
            nn.Conv2d(64, 64, (1, 1)),
            nn.BatchNorm2d(64)
        )

        self.sum1 = nn.Sequential(
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, (1, 3), stride=(1,2), padding=(0,1)),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, (1, 3), padding=(0,1)),
            nn.BatchNorm2d(128)
        )

        self.skip2 = nn.Sequential(
            nn.Conv2d(64, 128, (1, 1), stride=(1,2)),
            nn.BatchNorm2d(128)
        )

        self.sum2 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.ReLU()
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 256, (1, 3), stride=(1,2), padding=(0,1)),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, (1, 3), padding=(0,1)),
            nn.BatchNorm2d(256)
        )

        self.skip3 = nn.Sequential(
            nn.Conv2d(128, 256, (1, 1), stride=(1,2)),
            nn.BatchNorm2d(256)
        )

        self.sum3 = nn.Sequential(
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 512, (1, 3), stride=(1,2), padding=(0,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, (1, 3), padding=(0,1)),
            nn.BatchNorm2d(512)
        )

        self.skip4 = nn.Sequential(
            nn.Conv2d(256, 512, (1, 1), stride=(1,2)),
            nn.BatchNorm2d(512)
        )

        self.sum4 = nn.Sequential(
            nn.BatchNorm2d(512),
            nn.ReLU()
        )

        self.global_pooling = nn.AvgPool2d((1, 156))

        self.dense = nn.Linear(512, 8)


    def forward(self, x):
        x0 = self.conv0(x)
        x1 = self.sum1(self.conv1(x0) + self.skip1(x0))
        x2 = self.sum2(self.conv2(x1) + self.skip2(x1))
        x3 = self.sum3(self.conv3(x2) + self.skip3(x2))
        x4 = self.sum4(self.conv4(x3) + self.skip4(x3))
        x5 = self.global_pooling(x4)

        x6 = x5.view(x5.shape[0], -1)
        x7 = self.dense(x6)
        return x6

In [32]:
with open('../data/pickles/models/custom_resnet_model_55.p', 'rb') as f:
    model = pickle.load(f)

In [30]:
model[:-1]

TypeError: 'MyModel' object is not subscriptable

In [29]:
newmodel = torch.nn.Sequential(*(list(model.children())[:-1]))
newmodel

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(128, 3), stride=(1, 1), padding=(0, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (3): Sequential(
    (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): ReLU()
  )
  (4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(1, 3), stride=(1, 2), padding=(0, 1))
    (1): BatchNorm2d(12

In [10]:
with open('../data/pickles/main_dict.pickle', 'rb') as f:
    main_dict = pickle.load(f)

In [11]:
class ImageFolderWithPaths(torchvision.datasets.ImageFolder):
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [12]:
train_dir = '../data/spectrograms/train/train/'
val_dir = '../data/spectrograms/train/val/'
test_dir = '../data/spectrograms/test/'

In [13]:
one_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize([0.0069], [0.0033])
])

train_dataset = ImageFolderWithPaths(train_dir, one_transform)
val_dataset = ImageFolderWithPaths(val_dir, one_transform)

batch_size = 4
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=8
)

class_names = train_dataset.classes

In [14]:
test_dataset = ImageFolderWithPaths(test_dir, one_transform)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=8
)

In [15]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []
    
        for inputs, x, path in test_loader:
            inputs = inputs.to(device)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)
            
    probs = torch.nn.functional.softmax(torch.cat(logits), dim=-1).numpy()
    return probs

In [16]:
probs = predict(model, test_dataloader)
preds = np.argmax(probs, axis=1)
preds = [class_names[x]for x in preds]

In [17]:
true_class = list()
for x in test_dataset:
    path = x[2]
    name = path.split('/')[5].strip('.png')
    genre = main_dict[int(name)]['genre']
    true_class.append(genre)

In [18]:
np.array(np.array(true_class) == np.array(preds)).sum() / len(preds)

0.5979381443298969

In [19]:
from sklearn.metrics import f1_score
f1_score(true_class, preds, average='weighted')

0.6068307662518424

In [33]:
with torch.no_grad():
    logits = []

    for inputs, x, path in test_dataloader:
        inputs = inputs.to(device)
        model.eval()
        outputs = model(inputs).cpu()
        outputs = outputs.view(outputs.shape[0], -1)
        logits.append(outputs)
        break

In [34]:
outputs.shape

torch.Size([4, 512])

In [35]:
outputs[3]

tensor([1.3533e+00, 7.6774e-02, 2.3122e-01, 2.0391e-01, 1.1807e-02, 2.2045e-01,
        1.9637e+00, 6.6312e-01, 2.4221e-01, 3.9555e-01, 1.5470e+00, 6.9158e-02,
        1.9168e-01, 2.7678e-01, 1.3720e+00, 4.5506e-01, 6.8681e-01, 7.7914e-01,
        4.2933e-01, 5.5080e-01, 5.2408e-02, 1.1993e-01, 1.8741e-01, 1.1234e+00,
        1.2587e-01, 2.1183e-01, 5.3860e-01, 8.9890e-01, 1.5663e-01, 9.1550e-01,
        3.2602e-01, 1.2392e+00, 2.5497e-01, 2.3628e-01, 4.3947e-02, 2.9166e-01,
        3.4358e-01, 3.7946e-01, 1.4876e+00, 1.2251e-01, 2.2617e-01, 7.0939e-01,
        1.0598e+00, 9.8894e-01, 1.2447e+00, 1.3121e-02, 7.0357e-01, 4.5590e-01,
        2.0090e-01, 3.1472e-01, 8.2325e-02, 3.5095e-01, 2.8994e-02, 1.2500e-01,
        1.0103e+00, 8.8144e-02, 1.3596e-01, 7.3645e-02, 1.4827e-01, 3.2517e-01,
        1.6554e-01, 3.4745e-01, 3.7581e-01, 3.2449e-01, 8.9321e-03, 1.8314e-01,
        4.0948e-01, 2.4520e-01, 4.7574e-01, 1.0919e+00, 1.4649e+00, 2.3113e-02,
        1.0937e-01, 4.8258e-01, 1.0302e-

In [23]:
flat_model = model[:-2]

TypeError: 'MyModel' object is not subscriptable

In [36]:
with torch.no_grad():
    logits = []
    path_list = list()
    for inputs, x, path in test_dataloader:
        inputs = inputs.to(device)
        model.eval()
        outputs = model(inputs).cpu()
        outputs = outputs.view(outputs.shape[0], -1)
        logits.append(outputs)
        path_list.append(path)

In [37]:
with torch.no_grad():
#     logits = []
#     path_list = list()
    for inputs, x, path in val_dataloader:
        inputs = inputs.to(device)
        model.eval()
        outputs = model(inputs).cpu()
        outputs = outputs.view(outputs.shape[0], -1)
        logits.append(outputs)
        path_list.append(path)

In [38]:
with torch.no_grad():
#     logits = []
#     path_list = list()
    for inputs, x, path in train_dataloader:
        inputs = inputs.to(device)
        model.eval()
        outputs = model(inputs).cpu()
        outputs = outputs.view(outputs.shape[0], -1)
        logits.append(outputs)
        path_list.append(path)

In [39]:
len(path_list)

2000

In [40]:
path_list[0][0]

'../data/spectrograms/test/unknown/000190.png'

In [41]:
logits[0][0]

tensor([4.2660e-01, 9.4770e-01, 4.0977e-02, 1.1206e-01, 7.0712e-01, 1.1625e+00,
        8.0353e-01, 6.7246e-02, 1.1167e-01, 7.2490e-01, 9.4193e-01, 3.1589e+00,
        1.7728e+00, 6.7006e-03, 7.4222e-02, 8.1362e-02, 1.0331e+00, 6.4309e-01,
        1.5609e-01, 1.0863e-01, 3.0952e-01, 1.1050e-01, 2.0864e+00, 2.7958e+00,
        8.9385e-01, 4.9957e-01, 2.3699e+00, 8.8438e-01, 1.4698e+00, 4.6339e-03,
        4.2226e-02, 7.0422e-02, 1.1675e+00, 3.7796e-01, 1.4244e+00, 1.1887e+00,
        9.1567e-01, 6.1085e-01, 5.3914e-01, 3.3007e-02, 1.0496e+00, 0.0000e+00,
        1.7774e+00, 1.4602e+00, 9.1238e-01, 2.5805e-01, 1.1028e+00, 1.5210e+00,
        2.2665e+00, 6.4750e-01, 2.1433e-01, 8.4579e-01, 9.1978e-01, 1.3228e+00,
        1.3943e+00, 5.5475e-01, 3.1136e+00, 2.4588e+00, 5.6724e-01, 5.0780e-01,
        1.3105e-02, 6.7418e-01, 3.2605e-01, 1.6718e+00, 3.6741e-03, 2.5260e-01,
        4.9692e-01, 5.5080e-01, 3.3561e-01, 1.2929e+00, 1.3430e-01, 6.5861e-02,
        3.0020e+00, 1.9675e+00, 5.1272e-

In [42]:
emb_list = list()
for i in logits:
    for tens in i:
        emb_list.append(tens.numpy())

In [43]:
embedings = pd.DataFrame(emb_list)

In [44]:
track_list = list()
for i in path_list:
    for path in i:
        name = path.split('/')[-1]
        ids = name.split('.')[0]
        track_list.append(ids)

In [45]:
embedings['id'] = track_list

In [46]:
embedings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,id
0,0.426598,0.947701,0.040977,0.112058,0.707118,1.162531,0.803530,0.067246,0.111666,0.724896,...,0.136411,2.779022,0.039962,0.190971,3.211765,0.340266,0.552923,0.155349,0.457349,000190
1,0.958111,0.308117,0.088165,0.071058,0.431378,0.324224,0.839473,0.133348,0.122043,0.999581,...,0.297063,1.048719,0.030102,0.299465,2.430354,0.070081,0.408074,0.040798,0.319569,000194
2,2.726996,0.837075,0.141356,0.988398,0.062930,0.058893,0.221997,0.214702,0.170721,0.858936,...,0.247367,0.751735,0.037458,0.535053,1.876652,0.072166,0.499994,0.374152,0.979964,000667
3,1.353337,0.076774,0.231223,0.203908,0.011807,0.220445,1.963683,0.663118,0.242210,0.395554,...,0.022814,0.118767,0.047170,0.037215,1.230592,0.068908,0.184140,0.181901,0.905914,001040
4,1.857120,0.489802,0.093675,0.177646,0.443665,0.318869,0.174736,0.002716,0.147258,0.775616,...,0.272839,2.666928,0.069785,0.346600,2.577690,0.188910,0.500382,0.123293,0.147871,001686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7989,1.123316,0.257146,0.041416,0.331017,0.009278,0.060962,1.558685,0.512249,0.190467,0.637470,...,0.226283,0.032925,0.083238,0.083157,1.436705,0.035779,0.090603,0.087154,0.878640,149416
7990,0.661392,0.269433,0.034565,0.227549,0.034320,0.272772,1.513012,0.582256,0.098210,0.393443,...,0.080972,0.225576,0.043214,0.089456,1.589537,0.057539,0.578900,0.031669,0.655243,149417
7991,1.626190,0.082549,1.335705,0.973848,0.268110,0.928231,1.823195,1.195325,1.626279,0.178328,...,0.036719,0.128401,0.037347,0.027686,0.201245,0.149208,0.136870,1.800367,1.064121,149452
7992,1.385681,0.296244,0.075858,0.372868,0.048158,0.195493,0.737243,0.087842,0.119013,0.828757,...,0.672356,0.633477,0.091810,0.047381,2.137580,0.119381,0.112560,0.056708,1.155992,149488


In [47]:
embedings = embedings[['id'] + list(np.arange(512))]

In [48]:
embedings.to_csv('../data/csv/cnn_embedings_custom_resnet.csv', index=False)