# MVP model fine-tune analysis using mixed windows for guessing the validation test

In [51]:
# Input parameters
model_patch_size = 8
verbose          = 0
reset_kernel     = False

In [52]:
#! pip install --no-deps ydata_profiling
#! pip install --no-deps dacite
#! pip install --no-deps multimethod
#! pip install --no-deps visions
#! pip install --no-deps wordcloud
#! pip install --no-deps imagehash
#! pip install --no-deps htmlmin

In [53]:
import torch
torch.cuda.set_device(0)
#torch.cuda.set_device(1)

In [54]:
# Imports
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut
from dvats.imports import beep
import pandas as pdz
from copy import deepcopy
import matplotlib.pyplot as plt
import ydata_profiling as ydp
import seaborn as sns

In [55]:
# User and project
entity = os.environ.get("WANDB_ENTITY")
project = os.environ.get("WANDB_PROJECT")
folder = entity+'/'+project+'/'

# Dataset
dataset = 'gtrends_kohls'
dataset_version = 'v2'

#dataset = 'S1'
#dataset_version = 'v8'

enc_artifact_dataset = folder + dataset + ':' + dataset_version

enc_artifact_name = "mvp:v194"

In [56]:
print("Getting dataset artifact: ", enc_artifact_dataset)
df_artifact = wandb_api.artifact(enc_artifact_dataset, type = 'dataset')

Getting dataset artifact:  mi-santamaria/deepvats/gtrends_kohls:v2


In [57]:
print(df_artifact.name)
df = df_artifact.to_df()
display(df.head())
df.shape

gtrends_kohls:v2


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,volume
2004-01-01,0.010417
2004-01-08,0.010417
2004-01-15,0.010417
2004-01-22,0.0
2004-01-29,0.0


(440, 1)

In [58]:
enc_input = df

In [59]:
import dvats.config as cfg_
user, project, version, data, config, job_type = cfg_.get_artifact_config_MVP(False)

In [60]:
n_epochs_list     = [5, 10, 20]
dataset_percents  = [0.25, 0.5, 0.75, 1] #1 No tendría sentido porque sería como hacer lo mismo que con mvp. entrenar con todo el dataset.
masked_percents = [0.25, 0.5, 0.75]
sizes             = [1, 5]
total_cases = len(n_epochs_list)*len(dataset_percents)*len(masked_percents)*len(sizes)
print(f"Total cases: {total_cases}")

Total cases: 72


In [61]:
file_results_small = 'results_mvp_1'
file_errors_small = 'errors_mvp_1'
file_errors_small = file_errors_small+dataset+version+".csv"
file_results_small = file_results_small+dataset+version+".csv"

In [62]:
from dvats.encoder import _set_enc_input

## Preparing the DataLoaders

In [68]:
from fastai.data.core import DataLoaders
from torch.utils.data import Dataset, DataLoader

In [65]:
enc_input.batch_size

16

In [64]:
mssg = ut.Mssg(verbose=8)
enc_input, window_sizes = _set_enc_input(
    mssg = mssg, 
    X = df, 
    stride = 1,
    batch_size = 16,
    validation_percent = 0.3,
    training_percent = 0.7,
    window_mask_percent = 0.75,
    window_sizes = [17],
    n_window_sizes = 1,
    full_dataset = True,
    mix_windows = True,
    cpu = False
)

[8] [ --> _set_enc_input ]
[8] [91m [ _set_enc_input ] is none enc_input? True[0m
[8] [91m [ _set_enc_input ] About to get the windows[0m
[8] [ --> windowed_dataset ]
Initialize Windowed Dataset
[8]  [ _set_enc_input ] Train size: 308
[8]  [ _set_enc_input ] Valid size: 8
[8]  [ _set_enc_input ] X is a DataFrame, X~(440, 1) | window_sizes 1, n_window_sizes 1
[8]  [ windowed_dataset ] X is a DataFrame | Window sizes: 1
[8]  [ windowed_dataset ] Building the windows
[8] [windowed_dataset --> ]
[8] [91m [ _set_enc_input ] About to get the encoder input[0m
[8] [_set_enc_input --> ]


In [86]:
for batch in enc_input.data.valid_batches():
    print(f"Shape: {batch.shape}")

Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([16, 1, 17])
Shape: torch.Size([4, 1, 17])


In [151]:
def dl_shapes(dl):
    for i, batch in enumerate(dl):
        print(f"Batch {i+1}: {type(batch)}")
        if isinstance(batch, (list, tuple)):  # Para comprobar si es una tupla de (inputs, labels)
            print(f"  Input shape: {batch[0].shape}")
            if len(batch) > 1:
                print(f"  Target shape: {batch[1].shape}")
        else:
            print(f"  Shape: {batch.shape}")  # Si batch no es una tupla/lista
    return None

In [163]:
import torch
from torch.utils.data import Dataset

class IndexedDataset(Dataset):
    def __init__(self, batches):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # 🔹 Verificar si `batches` no está vacío
        if not batches:
            raise ValueError("⚠ Error: `batches` está vacío. No se puede crear un dataset sin datos.")

        # 🔹 Convertir los batches a `cuda` o `cpu`
        self.batches = [batch.to(self.device, dtype=torch.float32) for batch in batches]

        # 🔹 Verificar el primer batch
        print(f"\n📌 IndexedDataset creado con {len(self.batches)} batches")
        for i, batch in enumerate(self.batches):
            print(f"🔹 IndexedDataset Batch {i}: Shape {batch.shape}, Device {batch.device}")


        # 🔹 Verificar si el último batch es más pequeño y eliminarlo si es necesario
        if len(self.batches) > 1 and self.batches[-1].shape[0] < self.batches[0].shape[0]:
            print("⚠ Último batch más pequeño. Eliminando para evitar errores.")
            self.batches = self.batches[:-1]

        # 🔹 Confirmar si los tensores están en CUDA
        if self.batches[0].is_cuda:
            print("✅ Los tensores están correctamente en CUDA.")
        else:
            print("❌ ERROR: Los tensores no están en CUDA.")

    def __len__(self):
        return len(self.batches)

    def __getitem__(self, idx):
        batch = self.batches[idx]

        # 🔹 Solo devolver el batch (sin `None`)
        print(f"\n📌 Batch {idx} - Shape: {batch.shape}, Dispositivo: {batch.device}")
        return batch  # 🚀 Fastai espera solo el tensor, no una tupla


In [189]:
import torch
from torch.utils.data import Dataset

class FlattenedDataset(Dataset):
    def __init__(self, batches):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if not batches:
            raise ValueError("⚠ Error: `batches` está vacío. No se puede crear un dataset sin datos.")

        self.batches = [batch.to(self.device, dtype=torch.float32) for batch in batches]

        if len(self.batches) > 1 and self.batches[-1].shape[0] < self.batches[0].shape[0]:
            print("⚠ Último batch más pequeño. Eliminando para evitar errores.")
            self.batches = self.batches[:-1]

        # 🔹 Aplanar manteniendo la estructura original
        self.samples = [(x,) for batch in self.batches for x in batch]  # 🔹 DEVOLVER TUPLAS

        if self.samples[0][0].is_cuda:
            print("✅ Los tensores están correctamente en CUDA.")
        else:
            print("❌ ERROR: Los tensores no están en CUDA.")

        print(f"\n📌 FlattenedDataset creado con {len(self.samples)} muestras")
        print(f"📌 Shape de una muestra: {self.samples[0][0].shape}")  

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]  # 🔹 DEVOLVER TUPLAS DIRECTAMENTE


In [190]:
print("------------------------------------------ START INSPECTION ----------------------------------------")
print("--- Get batches ---")
# 🔹 Convertir el generador en una lista de tensores y asegurarse de que están en `cuda:0`
train_batches = [batch.to(device, dtype=torch.float32) for batch in enc_input.data.train_batches()]
valid_batches = [batch.to(device, dtype=torch.float32) for batch in enc_input.data.valid_batches()]
print("--- Check valid batches ...")
print(len(valid_batches))
print(valid_batches[0].shape)
print("--- Convert to IndexedDataset ---")
# 🔹 Crear datasets indexables con datos en CUDA/CPU según corresponda
#train_dataset = IndexedDataset(train_batches)
#valid_dataset = IndexedDataset(valid_batches)
train_dataset = FlattenedDataset(train_batches)
valid_dataset = FlattenedDataset(valid_batches)

# 🔹 Crear DataLoaders compatibles con fastai
print("---- Create dataloader ---")
train_loader = FastaiDataLoader(
    train_dataset, 
    bs=16,  
    shuffle=True#, 
    #collate_fn=lambda x: tuple(zip(*x))  # 🔹 Esto mantiene la estructura de tupla
)

valid_loader = FastaiDataLoader(
    valid_dataset, 
    bs=16,  
    shuffle=False#, 
    #collate_fn=lambda x: tuple(zip(*x))  # 🔹 Igual para validación
)
print("\n---- Verificando FastaiDataLoader ----")
print("---- Train---")
dl_shapes(train_loader)

print("---- Valid---")
dl_shapes(valid_loader)


print("---- Create dataloaders ---")
# 🔹 Crear el DataLoaders final en fastai
dls = DataLoaders(to_device(train_loader, device), to_device(valid_loader, device))
print("---- Dataloaders valid -----")
dl_shapes(dls.valid)
print("---- Dataloaders train -----")
dl_shapes(dls.train)
print("------------------------------------------ END OF INSPECTION ----------------------------------------")

------------------------------------------ START INSPECTION ----------------------------------------
--- Get batches ---
--- Check valid batches ...
8
torch.Size([16, 1, 17])
--- Convert to IndexedDataset ---
⚠ Último batch más pequeño. Eliminando para evitar errores.
✅ Los tensores están correctamente en CUDA.

📌 FlattenedDataset creado con 288 muestras
📌 Shape de una muestra: torch.Size([1, 1, 17])
⚠ Último batch más pequeño. Eliminando para evitar errores.
✅ Los tensores están correctamente en CUDA.

📌 FlattenedDataset creado con 112 muestras
📌 Shape de una muestra: torch.Size([1, 1, 17])
---- Create dataloader ---

---- Verificando FastaiDataLoader ----
---- Train---
Batch 1: <class 'tuple'>
  Input shape: torch.Size([16, 1, 1, 17])
Batch 2: <class 'tuple'>
  Input shape: torch.Size([16, 1, 1, 17])
Batch 3: <class 'tuple'>
  Input shape: torch.Size([16, 1, 1, 17])
Batch 4: <class 'tuple'>
  Input shape: torch.Size([16, 1, 1, 17])
Batch 5: <class 'tuple'>
  Input shape: torch.Size([

In [70]:
dls.valid

<torch.utils.data.dataloader.DataLoader at 0x7fa9944a2260>

## Validate model

In [111]:
enc_artifact = wandb_api.artifact(enc_artifact_name, type = 'learner')
enc = enc_artifact.to_obj()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [112]:
enc_bkup = deepcopy(enc)

In [188]:
import torch
from fastai.data.core import DataLoaders
from fastai.data.load import DataLoader as FastaiDataLoader
def validate_in_device():
    # 🔹 Obtener el dispositivo del modelo
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 🔹 Mover explícitamente `enc.model` a CUDA, asegurando que no lo regrese a CPU en validación
    enc.model.to(device)
    for module in enc.model.modules():
        module.to(device)
    for name, param in enc.model.named_parameters():
        param.data = param.data.to(device, dtype=torch.float32)
    for name, buffer in enc.model.named_buffers():
        buffer.data = buffer.data.to(device, dtype=torch.float32)

    # 🔹 Verificar los pesos y buffers del modelo para asegurar que están en CUDA y dtype `torch.float32`
    print("\n📌 **Verificando pesos del modelo**")
    for name, param in enc.model.named_parameters():
        print(f"{name}: {param.device}, dtype={param.dtype}")

    print("\n📌 **Verificando buffers del modelo**")
    for name, buffer in enc.model.named_buffers():
        print(f"{name}: {buffer.device}, dtype={buffer.dtype}")

    # 🔹 Verificar los datos del DataLoader
    for xb in dls.valid:
        xb = xb[0] if isinstance(xb, tuple) else xb  # 🔹 Desempaquetar si es una tupla
        xb = xb.to(device, dtype=torch.float32)  # 🔹 Asegurar que el tensor está en CUDA con el tipo correcto
        print(f"\n📌 **Verificando los datos del DataLoader:** Tipo de tensor: {xb.dtype}, Dispositivo: {xb.device}")
        break  # Solo mostrar un batch para prueba

    # 🔹 Ejecutar validación asegurando que **todo** está en CUDA con `torch.float32`
    enc.model.to(device)  # 🚨 Asegurar que `enc.model` sigue en CUDA antes de validar
    with torch.no_grad():
        for xb in dls.valid:
            xb = xb[0] if isinstance(xb, tuple) else xb  # 🔹 Desempaquetar si es necesario
            xb = xb.to(device, dtype=torch.float32)  # 🔹 Mover a `cuda:0` antes de la validación
            print(f"\n📌 Pasando xb al modelo con tipo: {xb.dtype}, dispositivo: {xb.device}")

            result = enc.validate(1, dls.valid, None)  # 🚀 Ejecutar validación
            print("\n✅ **Validación completada correctamente:**", result)
#validate_in_device()

### Move to CPU

In [180]:
import torch
check = False
def move_to_cpu(model, dataloaders):
    """ Mueve el modelo y los datos en los DataLoaders a CPU sin asumir targets. """
    device = torch.device("cpu")
    
    # 🔹 Mover modelo a CPU
    model.to(device)
    
    # 🔹 Mover pesos y buffers explícitamente
    for name, param in model.named_parameters():
        param.data = param.data.to(device)
    for name, buffer in model.named_buffers():
        buffer.data = buffer.data.to(device)

    # 🔹 Mover los datos en los DataLoaders a CPU
    def to_cpu_batch(batch):
        if isinstance(batch, (list, tuple)):
            return [x.to(device) if isinstance(x, torch.Tensor) else x for x in batch]
        elif isinstance(batch, torch.Tensor):
            return batch.to(device)
        return batch

    # 🔹 Modificar DataLoader sin asumir `y`
    dataloaders.train = [to_cpu_batch(x) for x in dataloaders.train]
    dataloaders.valid = [to_cpu_batch(x) for x in dataloaders.valid]

    print("✅ Todo ha sido movido a CPU correctamente.")

    return model, dataloaders

# 🔹 Aplicar la función
enc.model, dls = move_to_cpu(enc.model, dls)
if check:
    # 🔹 Comprobar
    for name, param in enc.model.named_parameters():
        print(f"{name}: {param.device}, {param.dtype}")
    for xb in dls.valid:
        print(f"Tipo de tensor en DataLoader: {xb.device}, {xb.dtype}")
        break  # Solo imprimir una muestra

✅ Todo ha sido movido a CPU correctamente.


### Validate ensuring determinism

In [120]:
import random

In [124]:
? enc.validate

[0;31mSignature:[0m  [0menc[0m[0;34m.[0m[0mvalidate[0m[0;34m([0m[0mds_idx[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m [0mdl[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mcbs[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Validate on `dl` with potential new `cbs`.
[0;31mFile:[0m      /usr/local/share/miniconda3/envs/env/lib/python3.10/site-packages/fastai/learner.py
[0;31mType:[0m      method

In [182]:
def mvp_validate_determinist(learn, idx=1, cbs=None, dl = None):
    """Ejecuta una validación determinista en CUBLAS y restaura el modelo después."""

    # 🔄 Guardar estado inicial del modelo sin `inference_mode()`
    initial_state = {k: v.clone().detach() for k, v in learn.model.state_dict().items()}

    # 🔄 Guardar específicamente `head.1` para restaurarlo manualmente
    head1_weight_before = learn.model.head[1].weight.clone().detach()
    head1_bias_before = learn.model.head[1].bias.clone().detach()

    # 🔄 Fijar semillas para asegurar reproducibilidad
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(42)

    # 🔄 Desactivar BatchNorm completamente (evita updates en `running_mean`)
    for module in learn.model.modules():
        if isinstance(module, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d)):
            module.track_running_stats = False  # No acumular stats
            module.momentum = 0  # Asegurar que no haya drift en estadísticas
            module.eval()

    # 🔄 Asegurar que el `valid_dl` no tenga shuffle ni drop_last
    learn.dls.valid.shuffle_fn = lambda x: x  # Desactivar mezcla de datos
    learn.dls.valid.drop_last = False

    # 🔄 Poner el modelo en modo evaluación
    learn.model.eval()

    # 🔄 Guardar configuración de CUBLAS antes de modificarla
    old_value = os.environ.get("CUBLAS_WORKSPACE_CONFIG")
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"  # Activar determinismo

    try:
        # 🚀 Ejecutar validación en modo determinista
        with torch.no_grad(), torch.inference_mode():
            result = learn.validate(ds_idx = idx, cbs = cbs, dl = dl)
    finally:
        # 🔄 Restaurar la variable de entorno original
        if old_value is not None:
            os.environ["CUBLAS_WORKSPACE_CONFIG"] = old_value
        else:
            del os.environ["CUBLAS_WORKSPACE_CONFIG"]

        # 🔄 Restaurar `head.1` manualmente evitando problemas con `inference_mode`
        for name, param in learn.model.named_parameters():
            if name == "head.1.weight":
                param.data = head1_weight_before.clone().to(param.device)
            elif name == "head.1.bias":
                param.data = head1_bias_before.clone().to(param.device)
            elif name in initial_state:
                param.data = initial_state[name].clone().to(param.device)

    return result  # 📊 Devolver resultado de la validación

enc.model, dls = move_to_cpu(enc.model, dls)
# 🔄 Probar la validación varias veces
learn = deepcopy(learn_bkup)
for i in range(10):
    result = mvp_validate_determinist(learn, 1, None, dls)
    print(f"Iteración {i+1} - Loss:", result)

✅ Todo ha sido movido a CPU correctamente.


not enough values to plot a chart


TypeError: MVP._loss() takes 3 positional arguments but 19 were given

In [None]:
def check_layers(learn):
    # ANSI escape codes para color rojo y negrita en la consola
    RED = "\033[91m"
    BOLD = "\033[1m"
    RESET = "\033[0m"

    seen_layers = set()
    param_dict = {name: param.shape for name, param in learn.model.named_parameters()}

    def recurse_layers(module, prefix="", depth=0):
        """Recorre todas las capas del modelo y muestra solo sus parámetros directos."""
        for name, layer in module.named_children():  # Obtiene todos los submódulos
            full_name = f"{prefix}.{name}" if prefix else name

            # Aplicar negrita solo la primera vez que aparece un nombre de capa
            formatted_name = f"{BOLD}{name}{RESET}" if full_name not in seen_layers else name
            seen_layers.add(full_name)

            indentation = "\t" * depth  # Agregar tabulación según la profundidad
            
            # Filtrar solo los parámetros **directamente dentro** de esta capa
            param_info = [
                f"{n.split('.')[-1]}: {param_dict[n]}"
                for n in param_dict
                if n.startswith(full_name) and n.count('.') == full_name.count('.') + 1
            ]
            param_text = f" | Parámetros: {', '.join(param_info)}" if param_info else ""

            print(f"{indentation}[{depth}] Capa {formatted_name} {RED}[{type(layer).__name__}]{RESET}{param_text}")

            # Recursivamente mostrar las subcapas
            recurse_layers(layer, full_name, depth + 1)

    # Iniciar el recorrido desde el modelo completo
    recurse_layers(learn.model)

check_layers(learn)


In [None]:
import random

In [None]:
enc.dls.valid

In [None]:
enc.validate()

In [None]:
def cases_loop(
    model, 
    n_epochs_list, 
    dataset_percents, 
    masked_percents, 
    n_sizes_list, 
    summarized = True, 
    do_beep = True, 
    verbose = 1,
    save = True,
    file_errors = "",
    file_results = ""
):
    mssg = ut.Mssg(verbose = verbose, level = -1)
    result_columns = [
        'model_size','n_epochs','dataset_percent','masked_percent','n_windows', 
        'time',
        'first_train_loss','first_mse','first_rmse','first_mae','first_smape', 
        'last_train_loss','last_mse','last_rmse','last_mae','last_smape'
    ]
    result_columns = result_columns if summarized else result_columns + ['losses','eval_results_pre','eval_results_post']
    results = pd.DataFrame(columns = result_columns)
    
    errors = pd.DataFrame(
        columns = [
            'model_size',
            'n_epochs',
            'dataset_percent',
            'masked_percent',
            'n_windows',
            'windows',
            'error'
        ]
    )
    model_backup = deepcopy(model)
    i = 0
    for n_epochs in n_epochs_list:
        for dataset_percent in dataset_percents:
            print(dataset_percent)
            for masked_percent in masked_percents:
                model.mask_generator = Masking(mask_ratio = masked_percent)
                for sizes in n_sizes_list:
                    print(f"--> epoch {n_epochs}, dataset_percent {dataset_percent}, mask {masked_percent}")
                    redmssg = f" sizes {sizes}"
                    redmssg = f"\033[91m{redmssg}\033[0m"
                    print(redmssg)

                    print(f"Cuda memmory allocated: {torch.cuda.memory_allocated()}")
                    model_case = deepcopy(model_backup)
                    case = {
                            'model_size': "small",
                            'n_epochs': n_epochs,
                            'dataset_percent': dataset_percent,
                            'masked_percent': masked_percent,
                            'n_windows': sizes,
                            'windows': None
                           }
                    result_dict = deepcopy(case)
                    error_dict = deepcopy(case)
                    error = False
                    print(1-dataset_percent)
                    torch.cuda.synchronize()
                    result = fine_tune(
                        enc_learn           = model_case,
                        window_mask_percent = masked_percent,
                        training_percent    = dataset_percent,
                        validation_percent  = 0.3,
                        num_epochs          = n_epochs,
                        n_window_sizes      = sizes,
                        verbose             = verbose,
                        register_errors     = True,
                        save_best_or_last   = True, # only available for moment,
                        #force_best_lr       = True,
                        **common_args    
                    )
                    common_args['print_mode']='a'

                    default_error = pd.DataFrame([{
                        'model_size': case['model_size'], 
                        'n_epochs': case['n_epochs'],
                        'dataset_percent': case['dataset_percent'],
                        'masked_percent': case['masked_percent'],
                        'n_windows': sizes,
                        'windows': "Unknown",
                        'error': 'Non registered error',
                        'window': "Unknown"
                    }])

                    try:
                        print("---- Returned internal errors ---")
                        internal_errors = result[10]
                        
                    except:
                        internal_errors = default_error
            
                    print("Check:", result[0])
                    if len(result[0]) > 0:
                    
                        result_dict.update({
                            'time'             : result[4],
                            'windows'          : result[8].cpu() if isinstance(result[8], torch.Tensor) else result[8],
                            'first_train_loss' : result[0][0][0].cpu().item() if torch.is_tensor(result[0][0][0]) else result[0][0][0],
                            'last_train_loss'  : result[0][-1][-1].cpu().item() if torch.is_tensor(result[0][-1][-1]) else result[0][-1][-1],
                            'best_epochs'       : result[9],
                            'train_losses'      : result[0][0],
                            'eval_pre'          : result[1],
                            'eval_post'         : result[2],
                            'full_result'       : result
                        })
                        if result[1] == {}:
                            result_dict.update({
                                'first_eval_loss'  : np.nan,
                                'first_mse'        : np.nan,
                                'first_rmse'       : np.nan,
                                'first_mae'        : np.nan
                            })
                        else:
                            print("N windows: ", len(result[8]))
                            print("Loss: ", result[1]['loss'])
                            result_dict.update({
                                'first_eval_loss'  : result[1]['loss'][-1].cpu().item() if torch.is_tensor(result[1]['loss']) else result[1]['loss'][-1],
                                'first_mse'        : result[1]['mse'][-1].cpu().item() if torch.is_tensor(result[1]['mse']) else result[1]['mse'][-1],    
                                'first_rmse'       : result[1]['rmse'][-1].cpu().item() if torch.is_tensor(result[1]['rmse']) else result[1]['rmse'][-1],
                                'first_mae'        : result[1]['mae'][-1].cpu().item() if torch.is_tensor(result[1]['mae']) else result[1]['mae'][-1],                                
                                'first_smape'      : result[1]['smape'].cpu().item() if torch.is_tensor(result[1]['smape']) else result[1]['smape']
                            })
                        if result[2] == {}:
                            result_dict.update({
                                'last_eval_loss'  : np.nan,
                                'last_mse'        : np.nan,
                                'last_rmse'       : np.nan,
                                'last_mae'        : np.nan
                            })
                        else:
                            result_dict.update({
                                'last_eval_loss'   : result[2]['loss'][-1].cpu().item() if torch.is_tensor(result[2]['loss'][-1]) else result[2]['loss'][-1],
                                'last_mse'         : result[2]['mse'][-1].cpu().item() if torch.is_tensor(result[2]['mse'][-1]) else result[2]['mse'][-1],
                                'last_rmse'        : result[2]['rmse'][-1].cpu().item() if torch.is_tensor(result[2]['rmse'][-1]) else result[2]['rmse'][-1],
                                'last_mae'         : result[2]['mae'][-1].cpu().item() if torch.is_tensor(result[2]['mae'][-1]) else result[2]['mae'][-1],
                                'last_smape'       : result[2]['smape'][-1].cpu().item() if torch.is_tensor(result[2]['smape'][-1]) else result[2]['smape'][-1]
                            })
                            
        
                        if not summarized:
                            result_dict.update({
                                'losses'           : [[v.cpu().item() if torch.is_tensor(v) else v for v in loss] for loss in result[0]],
                                'eval_results_pre' : {k: v.cpu().item() if torch.is_tensor(v) else v for k, v in result[1].items()},
                                'eval_results_post': {k: v.cpu().item() if torch.is_tensor(v) else v for k, v in result[2].items()},
                                })  
                        results = pd.concat([results, pd.DataFrame([result_dict])], ignore_index=True)
                    else:
                        print(f"Failed case | N Errors {errors.shape[0]} | N Results { results.shape[0] }")
                        # Attach possible errors
                        internal_errors['model_size'] = case['model_size']
                        internal_errors['n_epochs'] = case['n_epochs']
                        internal_errors['dataset_percent'] = case['dataset_percent']
                        internal_errors['masked_percent'] = case['masked_percent']
                        internal_errors['windows'] = [result[8]]*len(internal_errors)
                        print("--- Internal ---")
                        if (internal_errors.empty):
                            print("Returned errors empty")
                            internal_errors = default_error
                        display(internal_errors)
                        errors = pd.concat([errors, internal_errors])
                        print("--- Concatenated ---")
                        display(errors)
                        print(f"Failed case | N Errors {errors.shape[0]} | N Results { results.shape[0] } ")
                    if not error: mssg.print_error(f" case {case} | time: {result[4]}")
                    before = torch.cuda.memory_allocated()
                    model_case = None
                    gc.collect()
                    torch.cuda.empty_cache()
                    display(results)
                    if do_beep:
                        beep(1)
                    mssg.print(f"epoch {n_epochs}, dataset_percent {dataset_percent}, mask {masked_percent}, sizes {sizes} -->")
                if save:
                    mssg.print(f"Update results into {file_results}")
                    results.to_csv(file_results, index = False, header = True)
                    mssg.print(f"Update errors into {file_errors}")
                    errors.to_csv(file_errors, index = False, header = True)
                if do_beep:
                    beep(2)
                    beep(2)
                mssg.print(f"epoch {n_epochs}, dataset_percent {dataset_percent}, mask {masked_percent} -->")
            if do_beep:
                beep(3)
                beep(3)
                beep(3)
            mssg.print(f"epoch {n_epochs}, dataset_percent {dataset_percent}-->")
        if do_beep:
            beep(4)
            beep(4)
            beep(4)
            beep(4)
        mssg.print(f"epoch {n_epochs}-->")
    if do_beep:
        beep(1000)
        beep(1000)
        beep(1000)
        beep(1000)
        beep(1000)
    model_backup = None
    gc.collect()
    torch.cuda.empty_cache()
    return results, errors