In [None]:
import polars as pl
from datetime import datetime
import json
import seaborn as sns

In [None]:
with open('../params.json', 'r') as file :
    params = json.load(file)

DATASET, VERSION = params['dataset'], params['version']
DATA_FOLD = params['data_folder']

In [None]:
first_48h_data = pl.read_parquet(f'{DATA_FOLD}/{VERSION}/3.analysis/imputation_48/chu/first_48h_with_static.parquet')

In [None]:
demo_data = pl.read_parquet(f'{DATA_FOLD}/{VERSION}/2.clean_data/chu/static/clean_static_encounters.parquet')

In [None]:
pmsi_data = pl.read_excel(f'{DATA_FOLD}/{VERSION}/1.raw_data/chu/extended_demography/pmsi_2024.xlsx')

In [None]:
id_list = first_48h_data.select(pl.col('encounterId')).unique().to_series().to_list()

In [None]:
(demo_data
    .filter(
        pl.col('encounterId').is_in(id_list)
        )
    .filter(
        pl.col('utcInTime') > datetime(2023, 12, 1)
        )
    .sort('los', descending=True)
    .select(
        pl.col('encounterId').alias('ICCA_stay_id'),
        pl.col('encounterNumber').alias('IEP'),
        pl.col('lifeTimeNumber').alias('IPP'),
        pl.col('lastName').alias('Nom'),
        pl.col('firstName').alias('Prenom'),
        pl.col('dateOfBirth').alias('Date_de_naissance'),
        pl.col('utcInTime').alias('Date_entree'),
        pl.col('motif_adm').alias('Motif_admission'),
        pl.col('conclusion').alias('Conclusion'),
        pl.col('isDeceased').alias('Décè'),
        )
    .sort('Date_entree')
    ).sample(100).write_excel('/data2/poette.m/dypo/v3/2.clean_data/chu/pmsi_2024/patients_2024_sample_100.xlsx')

In [None]:
demo_unique = demo_data.sort(['encounterNumber', 'utcOutTime'], descending=[False, True]).unique('encounterNumber', keep='first')

In [None]:
pmsi_data

# Comparaison décès

In [None]:
pmsi_unique = pmsi_data.sort(['IEP', 'DtSortie'], descending=[False, True]).unique('IEP', keep='first').rename(
    {'IEP' : 'encounterNumber'}
    ).cast(
        {'encounterNumber' : pl.String}
    ).drop(
        ['Liste des actes',
        '__UNNAMED__26',
        '__UNNAMED__27',
        '__UNNAMED__28',
        '__UNNAMED__29',
        '__UNNAMED__30',
        '__UNNAMED__31',
        '__UNNAMED__32',
        '__UNNAMED__33',
        '__UNNAMED__34',
        '__UNNAMED__35',
        '__UNNAMED__36',
         'Motif_admission',
        'Conclusion'
        ]
    )

In [None]:
demo_pmsi = demo_unique.join(pmsi_unique, on='encounterNumber', how='inner')

In [None]:
pmsi_unique.shape

In [None]:
df_deces = (demo_pmsi
    .select(
        'encounterNumber', 'isDeceased', 'ModSortie',	'Libellé ModSortie', 'DtSortie', 'utcOutTime'
        )
    .filter(
        (pl.col('isDeceased')==True) | (pl.col('ModSortie')==9)
        )
    .cast(
        {'utcOutTime' : pl.Date}
    )
    .with_columns(
        pl.col('isDeceased').xor((pl.col('ModSortie') == 9)).alias('mismatch'),
        (pl.col('DtSortie') - pl.col('utcOutTime')).dt.total_days().alias('date_diff')
    )
    )

In [None]:
sortie_icca = df_deces.filter(pl.col('date_diff') <= 1)

In [None]:
sortie_icca.filter(pl.col('mismatch')==True).filter(pl.col('isDeceased')==True)

In [None]:
sortie_hosp = df_deces.filter(pl.col('date_diff') > 1)
sortie_hosp['date_diff'].describe()

# Comparaison diagnostics

In [None]:
pmsi_rum = pmsi_data.filter(pl.col('RUM Initial')=='oui').rename(
    {'IEP' : 'encounterNumber'}
    ).cast(
        {'encounterNumber' : pl.String}
    ).drop(
        ['Liste des actes',
        '__UNNAMED__26',
        '__UNNAMED__27',
        '__UNNAMED__28',
        '__UNNAMED__29',
        '__UNNAMED__30',
        '__UNNAMED__31',
        '__UNNAMED__32',
        '__UNNAMED__33',
        '__UNNAMED__34',
        '__UNNAMED__35',
        '__UNNAMED__36',
         'Motif_admission',
        'Conclusion'
        ]
    )

In [None]:
pmsi_rum.shape

In [None]:
demo_pmsi_dp = demo_unique.join(pmsi_rum, on='encounterNumber', how='inner').select(
    [
        'encounterNumber',
        'utcInTime',
        'DtEntree',
        'DtSortie',
        'motif_adm',
        'conclusion',
        'admission_type',
        'Libellé ModEntree',
        'DP',
        'Libellé DP',
         'DR',
        'DAS',
        'Liste des actes (compilés)'
     ]
)
    


# Jointure dataset 48h

In [None]:
pmsi_data.columns

In [None]:
pmsi_deces_48 = (
    pmsi_data
    .sort(['ICCA_stay_id', 'DtSortie'], descending=[False, True])
    .unique('ICCA_stay_id', keep='first')
    .with_columns(
        deces_pmsi = pl.col('ModSortie') == 9
    )
    .select(
        'ICCA_stay_id',
        'DtSortie',
        'deces_pmsi'
    )
    )
pmsi_dp_48 = (
    pmsi_data
    .sort(['ICCA_stay_id', 'DtEntree', 'RUM Initial'], descending=[False, False, True])
    .unique('ICCA_stay_id', keep='first')
    .with_columns(
        deces_pmsi = pl.col('ModSortie') == 9
    )
    .select(
        'ICCA_stay_id',
        'DP',
        'Libellé DP',
    )
    )


pmsi_48 = (pmsi_deces_48
           .join(
               pmsi_dp_48, 
               on='ICCA_stay_id', 
               how='inner'
               )
           .rename(
               {'ICCA_stay_id' : 'encounterId'}
           )
           .cast(
               {'encounterId' : pl.String}
           )
            .join(
            demo_data.select('encounterId', 'utcInTime', 'utcOutTime', 'gender', 'age', 'admission_type' ),
            on='encounterId',
            how='inner'
            )
            .cast(
                {
                    'utcInTime': pl.Date,
                    'utcOutTime' : pl.Date
                }
            )
            .with_columns(
                (pl.col('utcOutTime') - pl.col('utcInTime')).dt.total_days().alias('icu_los'),
                (pl.col('DtSortie') - pl.col('utcInTime')).dt.total_days().alias('hosp_los'),
                (pl.col('DP').str.slice(0,1).alias('cat_dp'))
            )
        )


pmsi_48.shape[0] == pmsi_deces_48.shape[0]

In [None]:
pmsi_48.write_parquet(f'{DATA_FOLD}/{VERSION}/2.clean_data/chu/static/pmsi_48.parquet')

In [None]:
pmsi_48['cat_dp'].value_counts().sort(['count'], descending=False)

In [None]:
first_48h_data.join(pmsi_48, on='encounterId', how='inner')

In [None]:
df_deces_48h = (demo_pmsi
    .select(
        'encounterId', 'isDeceased', 'ModSortie', 'Libellé ModSortie', 'DtSortie', 'utcOutTime'
        )
    .filter(
         (pl.col('encounterId').is_in(id_list))
        )
    .cast(
        {'utcOutTime' : pl.Date}
    )
    )

In [None]:
df_deces_48h

In [None]:
(pmsi_data
    .cast(
        {'ICCA_stay_id' : pl.String}
    )   
    .filter(
        pl.col('ICCA_stay_id').is_in(id_list)
    )
    .filter(
        pl.col('RUM Initial') == 'oui'
    )
    .sort(
        ['ICCA_stay_id', 'DtEntree'], descending=[False, True]
    )
    .unique(
        'ICCA_stay_id', 
        keep='first'
    )
    .with_columns(
        (pl.col('ModSortie') == 9).alias('deces')
    )
)