# Descargar los datos

Confirmo que pueda hacer uso de la GPU

In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
print(f"GPUs disponibles: {torch.cuda.device_count()}")
print(f"Está disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Nombre: {torch.cuda.get_device_name(0)}")

GPUs disponibles: 1
Está disponible: True
Nombre: NVIDIA RTX A5000


Imports

In [2]:
import os
import json
from pathlib import Path

import numpy as np
import pandas as pd
import mne
import torch

from braindecode.datasets.base import BaseDataset, BaseConcatDataset
from braindecode.preprocessing import create_fixed_length_windows
from braindecode.models import EEGNet
from braindecode.util import set_random_seeds
from braindecode import EEGClassifier
from braindecode.training import CroppedLoss

from skorch.helper import predefined_split
from skorch.callbacks import LRScheduler
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

Hacer la concatenación de la base de datos

In [3]:
data_path = Path("preprocessed_data")

mapping = {"bajo": 0, "medio": 1, "alto": 2}

datasets_list = []

for fif_path in data_path.glob("*_standardized_eeg.fif"):
    subj = fif_path.stem.split("_standardized_eeg")[0]
    raw = mne.io.read_raw_fif(fif_path, preload=False)

    with open(data_path / f"{subj}_description.json", "r") as f:
        desc = json.load(f)

    if isinstance(desc["p_factor_category"], str):
        desc["p_factor_category"] = mapping[desc["p_factor_category"]]
    
    # desc["target"] = desc["p_factor_category"]

    ds = BaseDataset(raw=raw, description=desc, target_name="p_factor_category")
    datasets_list.append(ds)

dataset = BaseConcatDataset(datasets_list)
print("Sujetos cargados:", len(dataset.datasets))

Opening raw data file preprocessed_data/NDARHT524ET0_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 35199 =      0.000 ...   351.990 secs
Ready.


Opening raw data file preprocessed_data/NDARRD943ZWU_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 39199 =      0.000 ...   391.990 secs
Ready.
Opening raw data file preprocessed_data/NDARXV294YRJ_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 38299 =      0.000 ...   382.990 secs
Ready.
Opening raw data file preprocessed_data/NDARFT305CG1_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 41099 =      0.000 ...   410.990 secs
Ready.
Opening raw data file preprocessed_data/NDARVG127VW5_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 35799 =      0.000 ...   357.990 secs
Ready.
Opening raw data file preprocessed_data/NDARXK254YD2_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 44599 =      0.000 ...   445.990 secs
Ready.
Opening raw data file preprocessed_data/NDAREL164JM1_standardized_eeg.fif...
Isotrak not found
    Range : 0 ... 34899 =      0.000 ...   348.990 secs
Ready.
Opening raw data file preprocessed_data/NDARXV034WE0

Voy a quitar los sujetos con menos de 8 segundos de datos

In [4]:
SFREQ = 100

In [5]:
#Creo el nuevo dataset
dataset = BaseConcatDataset(
    [
        ds for ds in dataset.datasets
        if ds.raw.n_times >= 4 * SFREQ
    ]
)

Modelo

In [6]:
n_times = 400
import torch

from braindecode.models import EEGNet
from braindecode.util import set_random_seeds

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = "cuda" if cuda else "cpu"
if cuda:
    torch.backends.cudnn.benchmark = True
# Set random seed to be able to roughly reproduce results
# Note that with cudnn benchmark set to True, GPU indeterminism
# may still make results substantially different between runs.
# To obtain more consistent results at the cost of increased computation time,
# you can set `cudnn_benchmark=False` in `set_random_seeds`
# or remove `torch.backends.cudnn.benchmark = True`
seed = 20200220
set_random_seeds(seed=seed, cuda=cuda)

n_classes = 3
classes = list(range(n_classes))
# Extract number of chans from dataset
n_chans = dataset[0][0].shape[0]

model = EEGNet(
    n_chans,
    n_classes,
    n_times=n_times,
    final_conv_length="auto",
)

# # Display torchinfo table describing the model
# print(model)

# Send model to GPU
if cuda:
    _ = model.cuda()

  warn(


In [7]:
#Paso a predicciones de cropped
model.to_dense_prediction_model()

In [8]:
#Miro el stride
n_preds_per_input = model.get_output_shape()[2]
n_preds_per_input

ValueError: During model prediction RuntimeError was thrown showing that at some layer ` Kernel size can't be greater than actual input size` (see above in the stacktrace). This could be caused by providing too small `n_times`/`input_window_seconds`. Model may require longer chunks of signal in the input than (1, 128, 400).

Creo las ventanas

In [13]:
SFREQ = 100
windows_dataset = create_fixed_length_windows(
    dataset,
    window_size_samples=n_times,
    window_stride_samples=250,
    drop_last_window=False,
    preload=False,
)

Hago el split del dataset en train y validación

In [14]:
description = windows_dataset.description
labels = description["p_factor_category"].to_list()

from sklearn.model_selection import train_test_split

train_idx, valid_idx = train_test_split(
    np.arange(len(windows_dataset.datasets)),
    test_size=0.2,
    stratify=labels,
    random_state=42
)

splitted = {
    "train": BaseConcatDataset([windows_dataset.datasets[i] for i in train_idx]),
    "valid": BaseConcatDataset([windows_dataset.datasets[i] for i in valid_idx]),
}

train_set = splitted["train"]
valid_set = splitted["valid"]

In [15]:
from skorch.callbacks import LRScheduler, PrintLog
from skorch.helper import predefined_split

from braindecode import EEGClassifier
from braindecode.training import CroppedLoss

# These values we found good for shallow network:
lr = 0.0625 * 0.01
weight_decay = 0

# For deep4 they should be:
# lr = 1 * 0.01
# weight_decay = 0.5 * 0.001

batch_size = 32
n_epochs = 2

clf = EEGClassifier(
    model,
    cropped=True,
    criterion=CroppedLoss,
    criterion__loss_function=torch.nn.functional.cross_entropy,
    optimizer=torch.optim.AdamW,
    train_split=predefined_split(valid_set),
    optimizer__lr=lr,
    optimizer__weight_decay=weight_decay,
    iterator_train__shuffle=True,
    batch_size=batch_size,
    callbacks=[
        "accuracy",
        ("lr_scheduler", LRScheduler("CosineAnnealingLR", T_max=n_epochs - 1)),
        PrintLog()
    ],
    device=device,
    classes=classes,
)

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = False  # reduce picos de memoria
torch.backends.cudnn.enabled = True
# Model training for a specified number of epochs. ``y`` is None as it is already supplied
# in the dataset.
_ = clf.fit(train_set, y=None, epochs=n_epochs)

: 

Prueba que esté tomando bien el target

In [None]:
print(type(windows_dataset))
print(len(windows_dataset))
print(windows_dataset[0][0].shape)  # primera ventana, datos
print(windows_dataset[0][1])        # primera ventana, target

<class 'braindecode.datasets.base.BaseConcatDataset'>
147390
(128, 1000)
-1


In [None]:
#Vpy a guardar los valores que está tomando como target yw
all_targets = []

for i in range(len(windows_dataset)):
    _, y, _ = windows_dataset[i]
    all_targets.append(int(y))  # convertir a int por seguridad

print(len(all_targets))
print(set(all_targets))  # valores únicos

147390
{-1}


In [None]:
print(windows_dataset.description["target"].unique())
print(windows_dataset.description["target"].dtype)

[1 2 0]
int64


In [None]:
print(windows_dataset.datasets[0].description)

subject                      NDARHT524ET0
task                         RestingState
age                                6.5013
sex                                     M
p_factor                             0.46
release_number                         R9
ehq_total                           51.14
commercial_use                        Yes
full_pheno                            Yes
attention                           1.361
internalizing                      -0.194
externalizing                      -0.731
restingstate                    available
despicableme                    available
funwithfractals                 available
thepresent                      available
diaryofawimpykid                available
contrastchangedetection_1       available
contrastchangedetection_2       available
contrastchangedetection_3       available
surroundsupp_1                  available
surroundsupp_2                  available
seqlearning6target              available
seqlearning8target            unav

In [None]:
n_preds_per_input

19

# Código viejo

Crear las ventanas

In [None]:
SFREQ = 100
window_size_samples = 4 * SFREQ   # 16s
window_stride_samples = 2 * SFREQ # 8s

windows_dataset = create_fixed_length_windows(
    dataset,
    window_size_samples=window_size_samples,
    window_stride_samples=window_stride_samples,
    drop_last_window=False,
    preload=True,
)

Hago el split solo train-val

In [None]:
description = windows_dataset.description
labels = description["p_factor_category"].to_list()

from sklearn.model_selection import train_test_split

train_idx, valid_idx = train_test_split(
    np.arange(len(windows_dataset.datasets)),
    test_size=0.2,
    stratify=labels,
    random_state=42
)

splitted = {
    "train": BaseConcatDataset([windows_dataset.datasets[i] for i in train_idx]),
    "valid": BaseConcatDataset([windows_dataset.datasets[i] for i in valid_idx]),
}

train_set = splitted["train"]
valid_set = splitted["valid"]

Creo el modelo y lo paso a predicción densa

In [None]:
cuda = torch.cuda.is_available()
device = "cuda" if cuda else "cpu"

set_random_seeds(seed=42, cuda=cuda)

n_chans = 128
n_classes = 3
n_times = window_size_samples  # 4s

model = EEGNet(
    n_chans=n_chans,
    n_outputs=n_classes,
    n_times=n_times
)

if cuda:
    model.cuda()

model.to_dense_prediction_model()
n_preds_per_input = model.get_output_shape()[2]

print("Predictions por input:", n_preds_per_input)

  warn(


ValueError: During model prediction RuntimeError was thrown showing that at some layer ` Kernel size can't be greater than actual input size` (see above in the stacktrace). This could be caused by providing too small `n_times`/`input_window_seconds`. Model may require longer chunks of signal in the input than (1, 128, 4000).

In [None]:
item = train_set.datasets[0][0]
print(type(item))
print(len(item))
print(item)

<class 'tuple'>
3
(array([[ 4.50527878e-05,  1.44036067e-05,  2.90817989e-05, ...,
        -1.18494308e-05, -2.10916169e-05, -2.50878948e-05],
       [ 8.06073003e-05,  2.47137177e-06,  1.36701274e-05, ...,
        -1.48206955e-05, -1.72209966e-05, -1.44601863e-05],
       [ 7.88024117e-05,  3.02632998e-06,  1.07903716e-05, ...,
        -4.90740877e-06, -9.68882705e-06, -1.26947170e-05],
       ...,
       [ 9.08524016e-05,  3.54386029e-05,  7.69710095e-05, ...,
         3.82810940e-05,  2.99909352e-05,  2.93819248e-05],
       [ 5.07642726e-05,  1.49874231e-05,  3.18266466e-05, ...,
        -4.01048979e-04, -3.97911441e-04, -3.88309301e-04],
       [ 4.28216772e-05,  4.11674591e-06,  1.63652112e-05, ...,
        -1.74224100e-04, -1.71812702e-04, -1.63363104e-04]],
      shape=(128, 4000), dtype=float32), -1, [0, 0, 4000])


# Código para probar cuánta GPU usar

In [None]:
import torch
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Ajusta según tu caso ---
n_chans = 128
n_samples = 200   # microventana de 2s a 100 Hz
n_outputs = 3     # p-factor categórico
dtype = torch.float32

# --- Define o importa tu modelo aquí ---
# Ejemplo: model = SyncNet(n_chans=n_chans, n_outputs=n_outputs, input_window_samples=n_samples).to(device)
# Reemplaza por el modelo que usarás.
model = model.to(device)  # asume que ya tienes model creado

# --- Función que prueba un batch y devuelve peak memory en MB ---
def test_batch_memory(batch_size):
    try:
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats(device)
        X = torch.randn(batch_size, n_chans, n_samples, dtype=dtype, device=device)
        y = torch.randint(0, n_outputs, (batch_size,), device=device)

        model.train()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)  # solo para backward
        optimizer.zero_grad()
        out = model(X)
        loss = torch.nn.functional.cross_entropy(out, y)
        loss.backward()
        optimizer.step()

        peak_mb = torch.cuda.max_memory_allocated(device) / (1024**2)
        return True, peak_mb
    except RuntimeError as e:
        # Captura OOM
        if 'out of memory' in str(e).lower():
            torch.cuda.empty_cache()
            return False, None
        else:
            raise

# --- Búsqueda binaria para batch máximo seguro ---
def find_max_batch(min_b=1, max_b=2048, safety_margin=0.85):
    # Primero prueba max_b por si ya cabe
    lo, hi = min_b, max_b
    last_ok = min_b
    while lo <= hi:
        mid = (lo + hi) // 2
        ok, peak = test_batch_memory(mid)
        print(f"Testing batch={mid} -> ok={ok}, peak={peak}")
        if ok:
            last_ok = mid
            lo = mid + 1
        else:
            hi = mid - 1
    # Ahora medir el último_ok para saber el peak y aplicar safety margin
    ok, peak = test_batch_memory(last_ok)
    if not ok:
        return last_ok, None
    max_safe = int(last_ok * safety_margin)
    return last_ok, max_safe, peak

# --- Uso ejemplo ---
# Ojo: find_max_batch hará múltiples forward/backward (puede tardar un poco)
last_ok, max_safe, peak = find_max_batch(min_b=8, max_b=1024, safety_margin=0.8)
print("Último batch que cabe:", last_ok)
print("Batch 'seguro' sugerido (con margen):", max_safe)
print("Peak MB del último ok:", peak)


Testing batch=516 -> ok=True, peak=2280.416015625
Testing batch=770 -> ok=True, peak=3402.59765625
Testing batch=897 -> ok=True, peak=3962.60400390625
Testing batch=961 -> ok=True, peak=4245.05078125
Testing batch=993 -> ok=True, peak=4387.00146484375
Testing batch=1009 -> ok=True, peak=4456.9208984375
Testing batch=1017 -> ok=True, peak=4492.94140625
Testing batch=1021 -> ok=True, peak=4510.57470703125
Testing batch=1023 -> ok=True, peak=4520.02880859375
Testing batch=1024 -> ok=True, peak=4524.02880859375
Último batch que cabe: 1024
Batch 'seguro' sugerido (con margen): 819
Peak MB del último ok: 2524.62646484375
