In [4]:
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models

import pickle
from tqdm import tqdm, auto

import os

In [5]:
class AudioDataset(Dataset):
    def __init__(self, pkl_dir, transforms=None):
        self.data = []
        self.length = 1500
        self.transforms = transforms
        with open(pkl_dir, "rb") as f:
            self.data = pickle.load(f)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        entry = self.data[idx]
        output_data = {}
        values = entry["values"].reshape(-1, 128, self.length)
        values = torch.Tensor(values)
        if self.transforms:
            values = self.transforms(values)
        target = torch.LongTensor([entry["target"]])
        return (values, target)

    
def fetch_dataloader(pkl_dir, batch_size, num_workers):
    dataset = AudioDataset(pkl_dir)
    dataloader = DataLoader(dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers)
    return dataloader

In [6]:
train_loader = fetch_dataloader( "data/training128mel1.pkl", 16, 8)
val_loader = fetch_dataloader("data/validation128mel1.pkl", 16, 8)

In [31]:
def train_epoch(model, device, data_loader, optimizer, loss_fn):
    model.train()
    loss_total, cnt = 0, 0

    with tqdm(total=len(data_loader)) as t:
        for batch_idx, data in enumerate(data_loader):
            inputs = data[0].to(device)
            target = data[1].squeeze(1).to(device)

            outputs = model(inputs)

            loss = loss_fn(outputs, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_total += loss.item()
            cnt += 1

            t.set_postfix(loss='{:05.3f}'.format(loss_total / cnt))
            t.update()
    return loss_total / cnt

def validate(model, device, test_loader):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader):
            inputs = data[0].to(device)
            target = data[1].squeeze(1).to(device)

            outputs = model(inputs)

            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    return (100*correct/total)

def train(model, device, train_loader, val_loader, optimizer, loss_fn, scheduler=None, epochs=10, model_name="network"):
    best_acc = 0.

    for epoch in range(epochs):
        loss = train_epoch(model, device, train_loader, optimizer, loss_fn)
        acc = validate(model, device, val_loader)
        print(f"Epoch {epoch}/{epochs} Loss: {loss} Valid Acc: {acc}")

        best_acc = max(best_acc, acc)
        if scheduler:
            scheduler.step()

        with open(f"models/{model_name}{epoch}.pkl", 'wb') as f:
            pickle.dump(model, f)
            
    print(f"Training finished.\nBest accuracy: {best_acc}")

In [32]:
class Inception(torch.nn.Module):
    def __init__(self, dataset, pretrained=True):
        super(Inception, self).__init__()
        num_classes = 50 if dataset=="ESC" else 10
        self.model = models.inception_v3(pretrained=pretrained, aux_logits=False)
        self.model.fc = torch.nn.Linear(2048, num_classes)

    def forward(self, x):
        output = self.model(x)
        return output

In [33]:
device = torch.device("cpu")

model = Inception("GTZAN", True).to(device)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1)

In [34]:
train(model, device, train_loader, val_loader, optimizer, loss_fn, scheduler=scheduler, epochs=10, model_name="inception")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [06:44<00:00,  8.08s/it, loss=1.846]


Epoch 0/3 Loss: 1.8463783824443818 Valid Acc: 69.0


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:01<00:00, 10.83s/it, loss=0.908]


Epoch 1/3 Loss: 0.908305613398552 Valid Acc: 79.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:37<00:00, 11.55s/it, loss=0.461]


Epoch 2/3 Loss: 0.46108962774276735 Valid Acc: 81.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:37<00:00, 11.54s/it, loss=0.243]


Epoch 3/3 Loss: 0.2433323010802269 Valid Acc: 84.0


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:21<00:00, 11.23s/it, loss=0.189]


Epoch 4/3 Loss: 0.18949869092553853 Valid Acc: 84.0


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:12<00:00, 11.04s/it, loss=0.136]


Epoch 5/3 Loss: 0.13611451912671327 Valid Acc: 86.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [09:17<00:00, 11.15s/it, loss=0.127]


Epoch 6/3 Loss: 0.12737614192068578 Valid Acc: 83.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [08:39<00:00, 10.40s/it, loss=0.124]


Epoch 7/3 Loss: 0.12419999621808529 Valid Acc: 84.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [08:03<00:00,  9.66s/it, loss=0.082]


Epoch 8/3 Loss: 0.08176276277750731 Valid Acc: 86.5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [08:01<00:00,  9.64s/it, loss=0.067]


Epoch 9/3 Loss: 0.06722776734270156 Valid Acc: 87.0
Training finished.
Best accuracy: 87.0


In [35]:
with open("models/inception9.pkl", 'rb') as f:
    best_model = pickle.load(f)

best_model

Inception(
  (model): Inception3(
    (Conv2d_1a_3x3): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2a_3x3): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2b_3x3): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Conv2d_3b_1x1): BasicConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_4a_3x3): BasicCo

Yay, we can definitely compete with something like this...

## My netwurk.

In [36]:
class MyNetwurk(torch.nn.Module):
    def __init__(self, input_size, num_classes):
        super(MyNetwurk, self).__init__()
        c, _, _ = input_size
        
        self.convlayers = torch.nn.Sequential(
            torch.nn.Conv2d(c, 6, (3, 3)),
            torch.nn.BatchNorm2d(6),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d((2, 2), stride=2), 
            
            torch.nn.Conv2d(6, 16, (3, 3)), 
            torch.nn.BatchNorm2d(16),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d((2, 2), stride=2),
            
            torch.nn.Conv2d(16, 64, (3, 3)), 
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d((2, 2), stride=2),
        )

        self.fc = torch.nn.Sequential(
            torch.nn.Linear(256, 120),
            torch.nn.BatchNorm1d(120),
            torch.nn.ReLU(inplace=True),
            
            torch.nn.Linear(120, 60),
            torch.nn.BatchNorm1d(60),
            torch.nn.ReLU(inplace=True),

            torch.nn.Linear(60, num_classes),
        )

    def forward(self, x):
        x = self.convlayers(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

In [37]:
net = MyNetwurk((3, 1234, 1234), 10)

In [42]:
with open("test.pkl", 'wb') as f:
    pickle.dump(net, f)

In [43]:
with open("test.pkl", 'rb') as f:
    obj = pickle.load(f)

obj

MyNetwurk(
  (convlayers): Sequential(
    (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stat