<img src="assets/logo.png" width="300px">

# Supplementary 2 -
**BoutScout: A Deep Learning Framework for Automatic Detection of Incubation Events in Avian Nests Using Temperature Time Series**

Author: [Jorge Lizarazo](https://www.researchgate.net/profile/Jorge-Lizarazo-Borrero?ev=hdr_xprf)




In [40]:
import numpy as np
import glob
import os
import pandas as pd
import json
import ast
import torch.nn as nn
import matplotlib.pyplot as plt
import random
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import torch
import torch.optim as optim

In [42]:
#print(torch.__version__)
#print(torch.cuda.is_available())
#print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

In [43]:
# Cargar arrays
X_array = np.load("X_array_cleaned.npy", allow_pickle=True)
y_array = np.load("y_array_cleaned.npy", allow_pickle=True)

# Cargar clases del LabelEncoder
le = LabelEncoder()
le.classes_ = np.load("label_classes.npy", allow_pickle=True)

# Definir Dataset
class NestEventDataset(Dataset):
    def __init__(self, X_array, y_array):
        self.X_array = X_array
        self.y_array = y_array

    def __len__(self):
        return len(self.X_array)

    def __getitem__(self, idx):
        X = self.X_array[idx].astype(np.float32)
        y = self.y_array[idx].astype(np.int64)
        return torch.tensor(X), torch.tensor(y)

# Crear el dataset
dataset = NestEventDataset(X_array, y_array)

In [44]:

idx = random.randint(0, len(dataset) - 1)
X_sample, y_sample = dataset[idx]
X_np = X_sample.numpy()
y_np = y_sample.numpy()

In [45]:
print(f"Número total de días (entradas): {len(y_array)}")

Número total de días (entradas): 2232


In [46]:
idx = random.randint(0, len(X_array) - 1)
X_sample = X_array[idx].astype(np.float32)
y_sample = y_array[idx].astype(np.int64)

X_np = X_sample
y_np = y_sample

In [47]:
y_np

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [48]:
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out)
        return out

In [49]:
input_size = X_array[0].shape[1]  # número de features por minuto (ej: temperatura, ambiente, etc.)
hidden_size = 64
num_layers = 2
num_classes = 3  # ['Error', 'Nocturnal', 'Off', 'On'], aunque eliminaste "Error", mantenemos por consistencia

model = BiLSTMModel(input_size, hidden_size, num_layers, num_classes)

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [52]:
from sklearn.model_selection import StratifiedKFold

import torch.nn as nn
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_recall_curve, average_precision_score
from sklearn.metrics import classification_report
import os

In [53]:
os.makedirs("figures/confusion_matrices", exist_ok=True)
os.makedirs("figures/precision_recall", exist_ok=True)
os.makedirs("figures/loss_curves", exist_ok=True)

In [54]:
# Etiqueta por día dominante
# Crear carpetas de salida si no existen
os.makedirs("resultados_folds", exist_ok=True)
os.makedirs("modelos_folds", exist_ok=True)

mejor_f1 = 0
mejor_fold = -1
y_mode = [np.bincount(y).argmax() for y in y_array]


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_scores = []
ruta_mejor_modelo = "modelos/modelo_bilstm_cv_mejor_cross.pth"

In [None]:
for fold, (train_idx, val_idx) in enumerate(skf.split(X_array, y_mode), 1):
    print(f"\n=== Fold {fold}/5 ===")

    X_train = [X_array[i] for i in train_idx]
    y_train = [y_array[i] for i in train_idx]
    X_val = [X_array[i] for i in val_idx]
    y_val = [y_array[i] for i in val_idx]

    train_ds = NestEventDataset(X_train, y_train)
    val_ds = NestEventDataset(X_val, y_val)

    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=16, shuffle=False)

    model = BiLSTMModel(input_size=X_array[0].shape[1], hidden_size=64, num_layers=2, num_classes=3).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    train_losses = []

    for epoch in range(50):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.view(-1, outputs.shape[-1]), y_batch.view(-1))
            loss.backward()
            train_losses.append(loss.item())
            optimizer.step()

    # Evaluación
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)
            preds = torch.argmax(outputs, dim=2).view(-1).cpu().numpy()
            labels = y_batch.view(-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels)

        np.save(f"resultados_folds/fold_{fold}_train_losses.npy", np.array(train_losses))
        np.save(f"resultados_folds/fold_{fold}_labels.npy", np.array(all_labels))
        np.save(f"resultados_folds/fold_{fold}_preds.npy", np.array(all_preds))

    report = classification_report(all_labels, all_preds, target_names=['Nocturnal', 'Off', 'On'], zero_division=0, output_dict=True)
    f1_macro = report['macro avg']['f1-score']
    fold_scores.append(f1_macro)

    print(f"F1 macro (fold {fold}): {f1_macro:.4f}")

    # === Guardar resultados y modelo de este fold ===
    with open(f"resultados_folds/fold_{fold}_reporte.json", "w") as f:
        json.dump(report, f, indent=4)

    torch.save(model.state_dict(), f"modelos_folds/modelo_fold_{fold}.pth")

    if f1_macro > mejor_f1:
        mejor_f1 = f1_macro
        mejor_fold = fold
        torch.save(model.state_dict(), ruta_mejor_modelo)
        print(f"✅ Guardado mejor modelo (F1: {mejor_f1:.4f}) en fold {mejor_fold}")

print(f"\n=== Promedio F1 macro en validación cruzada: {np.mean(fold_scores):.4f} ===")
print(f"🏆 Mejor modelo guardado fue del fold {mejor_fold} con F1 macro = {mejor_f1:.4f}")


=== Fold 1/5 ===
