[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kovacsdotgergo/szakdolgozat/blob/feature%2Fcolab_bringup/esc_notebook.ipynb)

In [None]:
!git clone https://github.com/kovacsdotgergo/szakdolgozat.git
%cd szakdolgozat
!pip install wget torch torchvision torchaudio matplotlib pandas numpy timm==0.4.5

In [None]:
#TODO: tmp for branch
!git branch
!git checkout feature/colab_bringup
!git branch

In [None]:
import utils
esc_path, save_path, workspace_path, have_cuda = utils.setup_env()

In [None]:
from src.models import ASTModel
import torch
## Model
INPUT_TDIM = 512
audio_model = ASTModel(label_dim=50, input_tdim=INPUT_TDIM, imagenet_pretrain=True, audioset_pretrain=True)
audio_model = torch.nn.DataParallel(audio_model, device_ids=[0])
audio_model = audio_model.to(torch.device("cuda:0" if have_cuda else 'cpu'))
target_len = INPUT_TDIM
model_save_path = save_path + '/transformer.pth'
train_epochs = 20
train_proc_title = f'transformer {train_epochs} epoch training'
lr = 5e-6

In [None]:
import cnn
import torch
## Model
audio_model = cnn.conv2d_v1()
audio_model = torch.nn.DataParallel(audio_model, device_ids=[0])
audio_model = audio_model.to(torch.device("cuda:0" if have_cuda else 'cpu'))
target_len = 512
model_save_path = save_path + '/cnn2d_v1.pth'
train_epochs = 80
train_proc_title = f'CNN {train_epochs} epoch training'
lr = 0.0009


In [None]:
#for saving the best model
SAVE_PATH = '/kaggle/working/lstm_1lay_mel.pth'

class LSTM_mel(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, output_size):
    super().__init__()
    self.num_layers = num_layers
    self.hidden_size = hidden_size
    self.lstm = nn.LSTM(input_size=input_size, hidden_size=self.hidden_size,
                        num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, output_size)

  def forward(self, x):
    """
    x: (batch, mels, time_windows)
    """
    h_0 = torch.autograd.Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
    c_0 = torch.autograd.Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
    if have_cuda:
        h_0 = h_0.cuda()
        c_0 = c_0.cuda()
    #x = x.transpose(1, 2)
    out, (h_n, c_n) = self.lstm(x, (h_0, c_0))
    h_n = h_n.view(-1, self.hidden_size)
    return self.fc(h_n)

INPUT_SIZE = 128
HIDDEN_SIZE = 64
NUM_LAYERS = 1
OUTPUT_SIZE = 50

audio_model = LSTM_mel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)
audio_model = torch.nn.DataParallel(audio_model, device_ids=[0])
audio_model = audio_model.to(torch.device("cuda:0" if have_cuda else 'cpu'))
target_len = 512
model_save_path = save_path + '/lstm.pth'
train_epochs = 5
train_proc_title = f'LSTM {train_epochs} epoch training'
lr = 0.001

## Dataset
dataset = esc_dataset.ESCdataset(esc_path, n_fft=1024, hop_length=512,
                     n_mels=128, augment=False,  log_mel=True,
                     use_kaldi=True, target_len=None, resample_rate=22500)

In [None]:
import torch
import torch.nn as nn
import esc_dataset
import trainer
import numpy as np

## Dataset
dataset = esc_dataset.ESCdataset(esc_path, n_fft=1024, hop_length=256,
                     n_mels=128, augment=False,  log_mel=True,
                     use_kaldi=True, target_len=target_len, resample_rate=22500)

#dividing the dataset randomly, 80% train, 10% validation, 10% test
numtrain = int(0.8*len(dataset))
numval = (len(dataset) - numtrain) // 2
numtest = len(dataset) - numtrain - numval
split_dataset = torch.utils.data.random_split(dataset, [numtrain, numval, numtest])
#using augment on the training data
#split_dataset[0].augment = True

## DataLoader
BATCHSIZE = 16
trainloader = torch.utils.data.DataLoader(split_dataset[0], batch_size=BATCHSIZE,
                         shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(split_dataset[1], batch_size=BATCHSIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(split_dataset[2], batch_size=BATCHSIZE, shuffle=True)

## Trainer
trainer = trainer.Trainer(audio_model, have_cuda, criterion=nn.CrossEntropyLoss)

## Inference
spect, label = dataset[0]
print(f'trainer inference: {dataset.get_class_name(trainer.inference(spect, ret_index=True).item())}, '
    f'true label: {dataset.get_class_name(label)}')

## Training
lrs = np.logspace(-1, -6, num=5)
params = trainer.hyperparameter_plotting(lrs, trainloader, valloader, train_epochs=5)
print(params)
# trainer.train(trainloader, valloader, optimizer=torch.optim.AdamW, train_epochs=train_epochs,
#               val_interval=25, lr=lr, save_best_model=True, save_path=model_save_path)
# trainer.plot_train_proc(train_proc_title)

# ## Test
# trainer.load_model(model_save_path)
# print(f'test accuracy: {trainer.test(testloader)}')

# ## Inference after training
# print(f'Trainer inference after training: {dataset.get_class_name(trainer.inference(spect, ret_index=True).item())}, '
#     f'true label: {dataset.get_class_name(label)}')