### AP-HP Data exploration

Dans ce notebook, nous analysons les données issues du GH Paris Saclay pour remonter de possibles erreurs ou incohérences dans les données.

In [420]:
import pandas as pd

# deactivate warnings for chaining 
pd.options.mode.chained_assignment = None

In [421]:
# Constants 

code_hospital = {
  9: 'BRK',
  10: 'BCT',
  14: 'APR',
  28: 'ABC',
  68: 'RPC',
  79: 'SPR',
  96: 'PBR',
}

hospital_name = {
  'BRK': 'BERCK',
  'BCT': 'BICETRE',
  'PBR': 'PAUL-BROUSSE',
  'ABC': 'ANTOINE BECLERE',
  'SPR': 'SAINTE PERINE',
  'APR': 'AMBROISE PARE',
  'RPC': 'RAYMOND POINCARE',
}


In [422]:
# import excel files 

orbis = pd.read_csv('data/orbis.csv', sep=';', encoding='cp1252')
pacs = pd.read_excel('data/pacs.xlsx')
glims = pd.read_excel('data/glims.xlsx')
capacitaire = pd.read_excel('data/capacitaire.xlsx')
sirius = pd.read_excel('data/sirius.xlsx')

## Data Cleaning

In [423]:
# Orbis - take only relevant columns, rename and extract chambre code

orbis = orbis[['IPP', 'Chambre', 'U.Responsabilité']]
orbis.rename(columns={'IPP':'ipp', 'Chambre':'chambre'}, 
             inplace=True)

# Extract code chambre from chambre (e.g: C134 from C134 - CHAMBRE SEULE C134)
orbis['code_chambre'] = orbis['chambre'].str.split(r"\ - ", expand=True)[0]

# Extract hospital name from U.Responsabilité (e.g: ABC from 028081 - ABC OBSTETRIQUE (UF))
orbis['hospital_name'] = orbis['U.Responsabilité'].str.split(r"\ - ", expand=True)[1].str[0:3]

orbis = orbis[['ipp', 'code_chambre', 'hospital_name']]

In [424]:
# Glims - rename columns, drop_duplicates, map is_pcr value to 1

glims = glims[['ipp', 'is_pcr']]
glims.drop_duplicates('ipp', inplace=True) #note: drop_duplicate only returns first value for each IPP
glims['is_pcr'] = glims['is_pcr'].map({'Positif':1})

In [425]:
# Pacs - select columns, drop_duplicates
pacs = pacs[['ipp', 'radio']]
pacs.drop_duplicates('ipp', inplace=True)

In [426]:
# Capacitaire, rename is_covid column
capacitaire.rename(columns={'Full COVID 1/0':'is_covid_dedicated',
                            'hopital':'hospital_name',
                            'service_covid':'covid_service'}, inplace=True)

# Capacitaire, rename is_covid column
capacitaire.drop_duplicates(['hospital_name', 'covid_service'], inplace=True)

In [427]:
# Sirius, select columns, rename and filter 
sirius = sirius[['Hopital', 
                 'Intitulé Site Crise COVID', 
                 'Retenir ligne O/N',
                 'Code Chambre']]

sirius.rename(columns={'Hopital':'code_hospital',
                       'Intitulé Site Crise COVID': 'covid_service',
                       'Retenir ligne O/N':'filter_row',
                       'Code Chambre':'code_chambre'},
             inplace=True)

sirius = sirius.query("filter_row=='OUI'")

sirius.drop('filter_row', axis=1, inplace=True)

## Merge

In [428]:
# merge Orbis with Glims

orbis =\
    pd.merge(orbis,
             glims,
             on='ipp',
             how='left')

# merge with Pacs

orbis =\
    pd.merge(orbis,
             pacs,
             on='ipp',
             how='left')

orbis.fillna(0, inplace=True)

In [429]:
# Compute is_covid patient

def is_covid(x):
    if (x['is_pcr'] + x['radio']) > 0:
        return 1
    else:
        return 0

orbis['is_covid'] = orbis.apply(is_covid, axis=1)

In [430]:
# How many Covid patients do not have a room associated with ? 
print('il y a {n_patients_no_room} patients sans'
       ' Chambre dans Orbis dont {n_patients_no_room_covid} Covid positif:'\
      .format(n_patients_no_room = len(orbis.query("code_chambre == ''")),
              n_patients_no_room_covid = len(orbis.query("code_chambre == ''").query("is_covid==1")),
             ))

orbis.query("code_chambre == ''").query("is_covid==1")

il y a 98 patients sans Chambre dans Orbis dont 8 Covid positif:


Unnamed: 0,ipp,code_chambre,hospital_name,is_pcr,radio,is_covid
118,8003232267,,ABC,1.0,0.0,1
381,8014201282,,APR,1.0,0.0,1
402,8001043127,,APR,1.0,0.0,1
760,8014207211,,BCT,1.0,0.0,1
780,8014213145,,BCT,1.0,0.0,1
821,8008578325,,BCT,1.0,0.0,1
1182,8014232794,,BCT,1.0,0.0,1
1301,8008890155,,BCT,1.0,0.0,1


In [431]:
# add hospital_name to sirius 

sirius['hospital_name'] = sirius['code_hospital'].map(code_hospital)

In [432]:
# merge Sirius and Orbis

orbis =\
    pd.merge(orbis,
             sirius,
             how='left',
             on=['hospital_name', 
                 'code_chambre'])

In [433]:
# merge capacitaire 

orbis =\
    pd.merge(orbis,
             capacitaire,
             how='left',
             on=['hospital_name', 
                 'covid_service'])

In [434]:
df = orbis.groupby(['hospital_name', 
               'covid_service'])\
     .agg({'ipp': 'nunique',
           'is_covid': 'sum',
           'lits_ouverts': 'max',
           'lits_ouverts_covid': 'max'})\
     .reset_index()\
     .fillna(0)

In [435]:
df.head()

Unnamed: 0,hospital_name,covid_service,ipp,is_covid,lits_ouverts,lits_ouverts_covid
0,ABC,CHIR DIG/GYNECO/ORTHO MUTUALISE,5,0,8,0.0
1,ABC,COVID 12 (REA/SC),9,6,8,8.0
2,ABC,COVID 14,24,7,14,14.0
3,ABC,COVID 30,27,19,30,30.0
4,ABC,HGE,24,0,27,0.0
