# Notebook para treinar SpectroVit

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import random
random.seed(5)
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal,stats
import os

In [2]:
from datasets import DatasetSpgramSyntheticData
from models import SpectroViT
from losses import RangeMAELoss
from lr_scheduler import CustomLRScheduler
from save_models import SaveBestModel, SaveCurrentModel
from main_functions_adapted import valid_on_the_fly, run_train_epoch, run_validation
from main import calculate_parameters
from utils import clean_directory

Using cuda:0


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Dados

In [4]:
hop_size = 16
window_size = 256
window = signal.windows.hann(256,sym = True)

In [5]:
dataset_train = DatasetSpgramSyntheticData(path_data='../sample_data.h5',
                           start=0, end=84,
                           augment=True,hop_size=hop_size,window_size=window_size,window=window)
dataset_validation = DatasetSpgramSyntheticData(path_data='../sample_data.h5',
                           start=84, end=108,
                           augment=False,hop_size=hop_size,window_size=window_size, window=window)

### Modelo

In [6]:
spectrovit = SpectroViT().to(device)

### Loss e Optimizer

In [7]:
loss = RangeMAELoss()
optimizer = torch.optim.Adam(spectrovit.parameters(), lr=1e-4)
lr_scheduler = CustomLRScheduler(optimizer,'cosineannealinglr',T_max = 10, eta_min = 1e-6)

### Loop de treino e validação

In [8]:
n_epochs = 30
batch_size_train = 100
batch_size_validation = 6
step_for_safe_saving = 5
step_for_saving_plots = 3
epoch_to_switch_to_cosine = 20
save_dir_path = '../model_hop_16_mfft_256_zp/'
filename = 'model_hop_16_mfft_256_zp'
name_model = 'model_hop_16_mfft_256_zp'
save_best_model = SaveBestModel(dir_model=save_dir_path)
save_current_model = SaveCurrentModel(dir_model=save_dir_path)

In [9]:
dataloader_train = DataLoader(dataset_train, batch_size=batch_size_train, shuffle=True)
dataloader_validation = DataLoader(dataset_validation, batch_size=batch_size_validation, shuffle=True)

In [10]:
train_loss_list = []
val_loss_list = []
val_mean_mse_list = []
val_mean_snr_list = []
val_mean_linewidth_list = []
val_mean_shape_score_list = []
score_challenge_list = []

os.makedirs(save_dir_path, exist_ok=True)
clean_directory(save_dir_path)

for epoch in range(n_epochs):

  calculate_parameters(spectrovit)
  train_loss = run_train_epoch(model=spectrovit, optimizer=optimizer, criterion=loss, loader=dataloader_train, epoch=epoch, device=device)
  val_loss, loader_mean_mse, loader_mean_snr,loader_mean_linewidth,loader_mean_shape_score,score_challenge = run_validation(model=spectrovit, criterion=loss, loader=dataloader_validation, epoch=epoch, device=device)

  train_loss_list.append(train_loss)
  val_loss_list.append(val_loss)
  val_mean_mse_list.append(loader_mean_mse)
  val_mean_snr_list.append(loader_mean_snr)
  val_mean_linewidth_list.append(loader_mean_linewidth)
  val_mean_shape_score_list.append(loader_mean_shape_score)
  score_challenge_list.append(score_challenge)

  if epoch == epoch_to_switch_to_cosine:
    for param_group in optimizer.param_groups:
      param_group['lr'] = 1e-5
  elif epoch > epoch_to_switch_to_cosine:
    lr_scheduler.step()
    print("Current learning rate:",lr_scheduler.scheduler.get_last_lr()[0])

  save_best_model(current_valid_score=score_challenge, model=spectrovit, name_model=name_model)
  if epoch%step_for_saving_plots == 0:
    valid_on_the_fly(model=spectrovit, epoch=epoch, val_dataset=dataset_validation, save_dir_path=save_dir_path, filename=filename, device=device)
  if epoch%step_for_safe_saving == 0:
    save_current_model(current_valid_score=score_challenge, model=spectrovit, name_model=name_model)


Number of parameters: 90473472


Train Loop:   0%|          | 0/168 [00:00<?, ?it/s]

Generating Spectrograms of size:  (177, 124)
Zero padded to shape:  (1, 224, 224)


Train Loop: 100%|██████████| 168/168 [07:51<00:00,  2.81s/it, desc=[epoch: 1], iteration: 167/168, loss: 0.006398246673467968] 
Validation Loop:  25%|██▌       | 1/4 [00:00<00:00,  5.77it/s, desc=[Epoch 1] Loss: 0.0022447709925472736 | MSE:0.0002907 | SNR:48.2988627 | FWHM:0.0764546 | Shape Score:0.9993123]

Generating Spectrograms of size:  (177, 124)
Zero padded to shape:  (1, 224, 224)


Validation Loop: 100%|██████████| 4/4 [00:00<00:00,  5.68it/s, desc=[Epoch 1] Loss: 0.0023752516135573387 | MSE:0.0003014 | SNR:42.0101368 | FWHM:0.0764546 | Shape Score:0.9992269]


Best validation score: 0.2003958827443011
Saving current model with score: 0.2003958827443011


In [11]:
np.savetxt(save_dir_path+'train_loss_list.txt', np.array(train_loss_list), delimiter='\n')
np.savetxt(save_dir_path+'val_loss_list.txt', np.array(val_loss_list), delimiter='\n')
np.savetxt(save_dir_path+'val_mse_list.txt', np.array(val_mean_mse_list), delimiter='\n')
np.savetxt(save_dir_path+'val_snr_list.txt', np.array(val_mean_snr_list), delimiter='\n')
np.savetxt(save_dir_path+'val_linewidth_list.txt', np.array(val_mean_linewidth_list), delimiter='\n')
np.savetxt(save_dir_path+'val_mean_shape_score_list.txt', np.array(val_mean_shape_score_list), delimiter='\n')
np.savetxt(save_dir_path+'score_challenge_list.txt', np.array(score_challenge_list), delimiter='\n')