In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [None]:
with open('../params.json', 'r') as file :
    params = json.load(file)

DATASET, VERSION = params['dataset'], params['version']
DATA_FOLD = params['data_folder']

In [None]:
static = pd.read_parquet(f'{DATA_FOLD}/{VERSION}/2.clean_data/{DATASET}/static/clean_static_encounters.parquet')
temporal = pd.read_parquet(f'{DATA_FOLD}/{VERSION}/2.clean_data/{DATASET}/temporal/treated_all.parquet')
mask = pd.read_parquet(f'{DATA_FOLD}/{VERSION}/2.clean_data/{DATASET}/temporal/missing_matrix.parquet')
atb = pd.read_excel(f'{DATA_FOLD}/{VERSION}/1.raw_data/{DATASET}/other/all_atb.xlsx')
atb_label = pd.read_excel(f'{DATA_FOLD}/{VERSION}/1.raw_data/{DATASET}/other/atb_label.xlsx')


In [None]:
static.columns

In [None]:
temporal.columns

## EDS Demo Sepsis

### Select ids

In [None]:
label_map = atb_label[atb_label['class'].notnull()][['shortLabel','label', 'class']]

In [None]:
sepsis_df = static[(static['motif_adm'].str.contains('sepsis|septique', regex=True, na=False)) | (static['conclusion'].str.contains('sepsis|septique|infection', regex=True, na=False))].sort_values(by='utcInTime', ascending=False)
sepsis_pop = sepsis_df.encounterId.astype('int32').unique().tolist()

In [None]:
len(sepsis_pop)

In [None]:
sepsis_temp = temporal.loc[(temporal['encounterId'].isin(sepsis_pop)) & (temporal['delta_hour'].between(0,120))] 
atb_adm = atb.loc[atb['encounterId'].isin(sepsis_pop) & atb['dose_24'].notna()].merge(label_map, how='inner', on='shortLabel')

In [None]:
has_aminoside = atb_adm[atb_adm['class'] == 'aminoside'].encounterId.unique().tolist()

In [None]:
sepsis_temp.encounterId.nunique()

In [None]:
sepsis_temp['nad_dose_poids'] = sepsis_temp['nad_dose_poids'].fillna(0)
sepsis_temp.dropna(subset=['lactate', 'pam'], axis=0, inplace=True)

In [None]:
sepsis_agg = sepsis_temp[['encounterId', 'pas', 'pad', 'pam','spo2', 'temp','fr', 'heart_rate', 'nad_dose_poids', 'glyc_cap', 'tp', 'creat','bili_tot', 'num_plq', 'leucocytes', 'lactate', 'hemoglobine',
       'urine_output', 'iv_input']].groupby('encounterId', as_index=False).agg(
           {
              'pas': 'min',
              'pad': ['max','min'],
              'pam': ['max','min'],
              'spo2': ['max','min'],
              'temp': ['max','min'],
              'fr': ['max','min'],
              'heart_rate': ['max','min'],
              'nad_dose_poids': ['max','min'],
              'glyc_cap': ['max','min'],
              'tp': ['max','min'],
              'creat': ['max','min'],
              'bili_tot': ['max','min'],
              'num_plq': ['max','min'],
              'leucocytes': ['max','min'],
              'lactate': ['max','min'],
              'hemoglobine': ['max','min'],
              'urine_output': 'mean',
              'iv_input': ['max']
           }
       )    
       

In [None]:
sepsis_agg.columns = list(map('_'.join, sepsis_agg.columns.values))
sepsis_agg

In [None]:
sepsis_agg.rename(columns={'encounterId_': 'encounterId'}, inplace=True)

In [None]:
sepsis_df['encounterId'] = sepsis_df['encounterId'].astype('int32')
merged = sepsis_df[['encounterId', 'gender', 'age', 'poids_admission', 'taille', 'sapsii', 'los', 'deces_datediff', 'utcInTime']].merge(sepsis_agg, how='inner', on='encounterId')

In [None]:
import uuid

merged['encounterId'] = merged['encounterId'].transform(lambda g: uuid.uuid4())

In [None]:
merged.drop(columns=['utcInTime'], inplace=True)


In [None]:
merged.dropna(subset=['sapsii'], axis=0, inplace=True)

In [None]:
merged_sample = merged.sample(2288)

In [None]:
merged_sample.to_csv(f'{DATA_FOLD}/{VERSION}/2.clean_data/{DATASET}/sepsis_df.csv', index=False)

In [None]:
merged['encounterId_uuid'] = uuid.uuid4().hex

In [None]:
merged.encounterId.transform(lambda g: uuid.uuid4())

In [None]:
pivot_atb

In [None]:
pivot_atb.columns