In [None]:
# Paso 1: Ingesta y unificación de datos con polars
import polars as pl

# Rutas de los archivos
setA_path = r"C:\repos\physionet-sepsis-forecasting\data\raw\all_patients_setA.parquet"
setB_path = r"C:\repos\physionet-sepsis-forecasting\data\raw\all_patients_setB.parquet"
unified_path = r"C:\repos\physionet-sepsis-forecasting\data\raw\all_patients_unified.parquet"

# Leer ambos datasets
df_a = pl.read_parquet(setA_path)
df_b = pl.read_parquet(setB_path)

# Unificar
df = pl.concat([df_a, df_b])

# Guardar el dataset unificado
df.write_parquet(unified_path)
print(f"Dataset unificado guardado en {unified_path}")

PanicException: called `Result::unwrap()` on an `Err` value: ()

In [None]:
# Paso 2: Análisis exploratorio
df = pl.read_parquet(unified_path)

# Ver las primeras filas
print(df.head())

# Estadísticas descriptivas
print(df.describe())

# Conteo de valores nulos por columna
print(df.null_count())

In [None]:
# Paso 3: Limpieza y preprocesamiento
from sklearn.preprocessing import StandardScaler
import numpy as np

# Imputar nulos (ejemplo: con la media)
df_clean = df.fill_null(strategy="mean")

# Normalizar columnas numéricas
numeric_cols = [col for col, dtype in zip(df_clean.columns, df_clean.dtypes) if dtype in [pl.Float64, pl.Int64]]
scaler = StandardScaler()
df_clean = df_clean.with_columns([
    pl.col(col).map_batches(lambda x: scaler.fit_transform(x.reshape(-1, 1)).flatten()) for col in numeric_cols
])

# Guardar el dataset procesado
processed_path = "data/processed/all_patients_processed.parquet"
df_clean.write_parquet(processed_path)
print(f"Dataset procesado guardado en {processed_path}")

In [None]:
# Paso 4: Modelamiento LSTM y búsqueda de hiperparámetros
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

# Preparar datos para LSTM (ejemplo simple)
X = df_clean.drop("SepsisLabel").to_numpy()
y = df_clean["SepsisLabel"].to_numpy()

# Convertir a tensores
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Dataset y DataLoader
dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Definir modelo LSTM
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return torch.sigmoid(out)

# Búsqueda de hiperparámetros (ejemplo manual)
input_dim = X.shape[1]
hidden_dim = 64
num_layers = 2
model = LSTMModel(input_dim, hidden_dim, num_layers)

In [None]:
# Paso 5: Validación y selección
from sklearn.metrics import roc_auc_score, f1_score

# Entrenamiento y validación (simplificado)
# ...entrenar el modelo...

# Predicciones y métricas
# y_pred = model(X_tensor).detach().numpy()
# auc = roc_auc_score(y, y_pred)
# f1 = f1_score(y, y_pred > 0.5)
# print(f"AUC: {auc}, F1: {f1}")