In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
from torch.utils.data import TensorDataset, DataLoader

import json
import polars as pl
import pandas as pd

In [None]:
with open('../params.json', 'r') as file :
    params = json.load(file)

DATASET, VERSION, DATA_FOLD = params['dataset'], params['version'], params['data_folder']

print(f'Working on {DATASET} dataset {VERSION}')

In [None]:
IMPUTED_DATASET = f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/{DATASET}/tables/first_48_with_static_imputed_saits.parquet'

In [None]:
time_data = pl.read_parquet(IMPUTED_DATASET)

In [None]:
demo = pl.read_parquet('/data2/poette.m/dypo/v4/2.clean_data/chu/static/clean_static_encounters.parquet')

In [None]:
y_data = (demo
    .filter(
        pl.col('gender').is_in(['Masculin', 'Féminin'])
        )
    .select(
        'encounterId', 'gender', 'age', 'admission_type', 'utcInTime', 'date_deces'
        )
    .cast(
        {
            'utcInTime' : pl.Date
        }
    )
    .with_columns(
        deces_j30 = ((pl.col('date_deces') - pl.col('utcInTime')).dt.total_days() <= 30).fill_null(False)
    )
    .drop('utcInTime', 'date_deces')
)   

In [None]:
data = time_data.drop('gender', 'age', 'admission_type').join(y_data, on='encounterId', how='inner').to_pandas()

In [None]:
data['admission_type'].value_counts(dropna=False)

# Data Preparation

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

data_trans = data.copy()



In [None]:
scaler = StandardScaler()
le = LabelEncoder()

numeric_data = ['heart_rate', 'spo2', 'fr', 'pam','age']
cat_data = ['gender', 'admission_type', 'deces_j30']
for n in numeric_data :
    data_trans[n] = scaler.fit_transform(data_trans[[n]])

for c in cat_data :
    data_trans[c] = le.fit_transform(data_trans[c])

In [None]:
data_trans.admission_type.value_counts()

In [None]:
encounter_df = data_trans[['encounterId' , 'deces_j30']].drop_duplicates()

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(encounter_df, stratify = encounter_df['deces_j30'], test_size=0.3 )

In [None]:
X, y = data_trans.drop_duplicates(subset=['encounterId', 'deces_j30', 'intervalle']).drop(columns='deces_j30'), data_trans[['encounterId','deces_j30']].drop_duplicates()


X_train_2d = X[X['encounterId'].isin(train['encounterId'])]
X_test_2d = X[X['encounterId'].isin(test['encounterId'])]

y_train = y[y['encounterId'].isin(train['encounterId'])]['deces_j30'].to_numpy()
y_test = y[y['encounterId'].isin(test['encounterId'])]['deces_j30'].to_numpy()


In [None]:
X_train_2d

In [None]:
X_train = X_train_2d.drop(columns=['encounterId', 'intervalle']).to_numpy().reshape(-1, 48, 7)
X_test = X_test_2d.drop(columns=['encounterId', 'intervalle']).to_numpy().reshape(-1, 48, 7)

In [None]:
inputs = torch.tensor(X_train).to(torch.float32)
labels = torch.tensor(y_train)

In [None]:
len(inputs)
len(y_train)

In [None]:
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

# LSTM by Hand

In [None]:
len(X_train)

In [None]:
X_train.shape

In [None]:

class LSTM(L.LightningModule):

    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=7, hidden_size=1)
        self.loss_fn = nn.BCEWithLogitsLoss()

    def forward(self, input):
        input = input.permute(1, 0, 2)  # (seq_len, batch, input_size)
        lstm_out, _ = self.lstm(input)  # (seq_len, batch, hidden)
        return lstm_out[-1].squeeze(-1)  # (batch,)

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=1e-3)

    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        input_i = input_i.float()
        label_i = label_i.float()
        output_i = self(input_i)
        loss = self.loss_fn(output_i, label_i)

        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        input_i, label_i = batch
        input_i = input_i.float()
        label_i = label_i.float()
        output_i = self(input_i)
        loss = self.loss_fn(output_i, label_i)

        preds = torch.sigmoid(output_i) > 0.5
        acc = (preds == label_i).float().mean()

        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss


In [None]:
model = LSTM()

In [None]:
trainer = L.Trainer(max_epochs=5, log_every_n_steps=2)

trainer.fit(model, train_dataloaders=dataloader)

In [None]:
predict = torch.tensor(X_test).to(torch.float32)

In [None]:
pred = model(predict)

In [None]:
len(X_test)

In [None]:
import numpy as np
arr = (pred - torch.tensor(y_test)).detach().numpy()