In [None]:
import pandas as pd
import numpy as np
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
import json
from matplotlib.colors import Normalize
import tableone

with open('../params.json', 'r') as file :
    params = json.load(file)

DATASET, VERSION = params['dataset'], params['version']
DATA_FOLD = params['data_folder']
DEMO_DATA = f'{DATA_FOLD}/{VERSION}/2.clean_data/{DATASET}/static/clean_static_encounters.parquet'
TEMPORAL_DATA = f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/{DATASET}/first_48h.parquet'


# Import Datasets

In [None]:
data = pl.read_parquet(TEMPORAL_DATA)

# Patients descriptions

In [None]:
id_list = data['encounterId'].unique().to_list()

In [None]:
demo = pl.read_parquet(DEMO_DATA).filter(pl.col('encounterId').is_in(id_list))

In [None]:
demo['unitLabel'].value_counts()

In [None]:
demo = (demo
        .with_columns(
            year_inTime = pl.col('utcInTime').dt.year(),
            month_outTime = pl.col('utcOutTime').dt.month(),
            bmi = (pl.col('poids_admission') / (pl.col('taille') / 100) ** 2).round(1),
            los_days = (pl.col('los')/24).round(1)
            )
        )

In [None]:
demo

In [None]:
var_demo = ['age', 'gender', 'bmi', 'los_days', 'admission_type', 'sapsii', 'isDeceased']
categorical = [ 'gender','admission_type', 'isDeceased']
demo_pandas = demo.to_pandas()

In [None]:
mytable = tableone.TableOne(demo_pandas, var_demo, categorical, pval=False, missing=False)
print(mytable.tabulate(tablefmt="latex"))

In [None]:
demo['unitLabel'].unique()

# Time Series Data

In [None]:
data_features = {'heart_rate' : "Heart rate", 'spo2' : "SpO2", 'fr' : "Respiratory rate", 'pam' : "Mean Blood Pressure"}

In [None]:
data_df = data[list(data_features.keys())]

In [None]:
print(data_df.describe().to_pandas().to_latex())

## Distribution

In [None]:
import statsmodels.api as sm
import pylab as py

In [None]:
for k, v in data_features.items() :
    sns.histplot(data, x=k, binwidth=1, stat="density")
    plt.xlabel(v)
    plt.ylabel('')
    plt.show()
    sm.qqplot(data[k]) 
    plt.show()

In [None]:
correlation_table = data['heart_rate',
 'spo2',
 'fr',
 'pam',
].to_pandas().corr()

print(correlation_table.to_latex())