# Notebook para treinar SpectroVit

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import random
random.seed(5)
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal,stats
import os

In [2]:
from datasets import DatasetSpgramSyntheticData
from models import SpectroViT
from losses import RangeMAELoss
from lr_scheduler import CustomLRScheduler
from save_models import SaveBestModel, SaveCurrentModel
from main_functions_adapted import valid_on_the_fly, run_train_epoch, run_validation
from main import calculate_parameters
from utils import clean_directory

Using cuda:0


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Dados

In [4]:
dataset_train = DatasetSpgramSyntheticData(path_data='../sample_data.h5',
                           start=0, end=84,
                           augment=True)
dataset_validation = DatasetSpgramSyntheticData(path_data='../sample_data.h5',
                           start=84, end=108,
                           augment=False)

### Modelo

In [5]:
spectrovit = SpectroViT().to(device)

### Loss e Optimizer

In [6]:
loss = RangeMAELoss()
optimizer = torch.optim.Adam(spectrovit.parameters(), lr=1e-4)
lr_scheduler = CustomLRScheduler(optimizer,'cosineannealinglr',T_max = 10, eta_min = 1e-6)

### Loop de treino e validação

In [7]:
n_epochs = 5
batch_size_train = 1
batch_size_validation = 1
step_for_saving = 5
save_dir_path = '../firs_test/'
filename = 'first_model'
name_model = 'first_model'
save_best_model = SaveBestModel(dir_model=save_dir_path)
save_current_model = SaveCurrentModel(dir_model=save_dir_path)

In [8]:
dataloader_train = DataLoader(dataset_train, batch_size=batch_size_train, shuffle=True)
dataloader_validation = DataLoader(dataset_validation, batch_size=batch_size_validation, shuffle=True)

In [9]:
train_loss_list = []
val_loss_list = []
val_mean_mse_list = []
val_mean_snr_list = []
val_mean_linewidth_list = []
val_mean_shape_score_list = []
score_challenge_list = []

os.makedirs(save_dir_path, exist_ok=True)
clean_directory(save_dir_path)

for epoch in range(n_epochs):

  calculate_parameters(spectrovit)
  train_loss = run_train_epoch(model=spectrovit, optimizer=optimizer, criterion=loss, loader=dataloader_train, epoch=epoch, device=device)
  val_loss, loader_mean_mse, loader_mean_snr,loader_mean_linewidth,loader_mean_shape_score,score_challenge = run_validation(model=spectrovit, criterion=loss, loader=dataloader_validation, epoch=epoch, device=device)

  train_loss_list.append(train_loss)
  val_loss_list.append(val_loss)
  val_mean_mse_list.append(loader_mean_mse)
  val_mean_snr_list.append(loader_mean_snr)
  val_mean_linewidth_list.append(loader_mean_linewidth)
  val_mean_shape_score_list.append(loader_mean_shape_score)
  score_challenge_list.append(score_challenge)

  if epoch == 20:
    for param_group in optimizer.param_groups:
      param_group['lr'] = 1e-5
  elif epoch > 20:
    lr_scheduler.step()
    print("Current learning rate:",lr_scheduler.scheduler.get_last_lr()[0])

  save_best_model(current_valid_score=score_challenge, model=spectrovit, name_model=name_model)
  if epoch%3 == 0:
    valid_on_the_fly(model=spectrovit, epoch=epoch, val_dataset=dataloader_validation, save_dir_path=save_dir_path, filename=filename, device=device)
  if epoch%5 == 0:
    save_current_model(current_valid_score=score_challenge, model=spectrovit, name_model=name_model)


Number of parameters: 90473472


Train Loop:   0%|          | 0/16800 [00:00<?, ?it/s, desc=[epoch: 1], iteration: 0/16800, loss: 0.08865505456924438]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 1.95 GiB total capacity; 1.39 GiB already allocated; 18.31 MiB free; 1.54 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF