### AP-HP Data exploration

Dans ce notebook, nous analysons les données issues du GH Paris Saclay pour remonter de possibles erreurs ou incohérences dans les données.

In [233]:
import pandas as pd

# deactivate warnings for chaining 
pd.options.mode.chained_assignment = None

In [234]:
# Constants 

code_hospital = {
  9: 'BRK',
  10: 'BCT',
  14: 'APR',
  28: 'ABC',
  68: 'RPC',
  79: 'SPR',
  96: 'PBR',
}

hospital_name = {
  'BRK': 'BERCK',
  'BCT': 'BICETRE',
  'PBR': 'PAUL-BROUSSE',
  'ABC': 'ANTOINE BECLERE',
  'SPR': 'SAINTE PERINE',
  'APR': 'AMBROISE PARE',
  'RPC': 'RAYMOND POINCARE',
}


In [235]:
# import excel files 

orbis = pd.read_excel('csv/excel/orbis.xlsx')
pacs = pd.read_excel('csv/excel/pacs.xlsx')
glims = pd.read_excel('csv/excel/glims.xlsx')
capacitaire = pd.read_csv('csv/capacitaire.csv', sep=';')
sirius = pd.read_csv('csv/sirius.csv', sep=";")

## Data Cleaning

In [236]:
# Orbis - take only relevant columns, rename and extract chambre code

orbis = orbis[['IPP', 'Chambre', 'U.Responsabilité']]
orbis.rename(columns={'IPP':'ipp', 'Chambre':'chambre'}, 
             inplace=True)

# Extract code chambre from chambre (e.g: C134 from C134 - CHAMBRE SEULE C134)
orbis['code_chambre'] = orbis['chambre'].str.split(r"\ - ", expand=True)[0]

# Extract hospital name from U.Responsabilité (e.g: ABC from 028081 - ABC OBSTETRIQUE (UF))
orbis['hospital_name'] = orbis['U.Responsabilité'].str.split(r"\ - ", expand=True)[1].str[0:3]

orbis = orbis[['ipp', 'code_chambre', 'hospital_name']]

In [237]:
# Glims - rename columns, drop_duplicates, map is_pcr value to 1

glims = glims[['ipp', 'is_pcr']]
glims.drop_duplicates('ipp', inplace=True) #note: drop_duplicate only returns first value for each IPP
glims['is_pcr'] = glims['is_pcr'].map({'Positif':1})

In [238]:
# Pacs - select columns, drop_duplicates
pacs = pacs[['ipp', 'radio']]
pacs.drop_duplicates('ipp', inplace=True)

In [239]:
# Capacitaire, rename is_covid column
capacitaire.rename(columns={'Full COVID 1/0':'is_covid_dedicated',
                            'hopital':'hospital_name',
                            'service_covid':'covid_service'}, inplace=True)

In [240]:
# Sirius, select columns, rename and filter 
sirius = sirius[['Hopital', 
                 'Intitulé Site Crise COVID', 
                 'Retenir ligne O/N',
                 'Code Chambre']]

sirius.rename(columns={'Hopital':'code_hospital',
                       'Intitulé Site Crise COVID': 'covid_service',
                       'Retenir ligne O/N':'filter_row',
                       'Code Chambre':'code_chambre'},
             inplace=True)

sirius = sirius.query("filter_row=='OUI'")

sirius.drop('filter_row', axis=1, inplace=True)

## Merge

In [241]:
# merge Orbis with Glims

orbis =\
    pd.merge(orbis,
             glims,
             on='ipp',
             how='left')

# merge with Pacs

orbis =\
    pd.merge(orbis,
             pacs,
             on='ipp',
             how='left')

orbis.fillna(0, inplace=True)

In [242]:
# Compute is_covid patient

def is_covid(x):
    if (x['is_pcr'] + x['radio']) > 0:
        return 1
    else:
        return 0

orbis['is_covid'] = orbis.apply(is_covid, axis=1)

In [243]:
# How many Covid patients do not have a room associated with ? 
print('il y a {n_patients_no_room} patients sans'
       ' Chambre dans Orbis dont {n_patients_no_room_covid} Covid positif:'\
      .format(n_patients_no_room = len(orbis.query("code_chambre == ''")),
              n_patients_no_room_covid = len(orbis.query("code_chambre == ''").query("is_covid==1")),
             ))

orbis.query("code_chambre == ''").query("is_covid==1")

il y a 100 patients sans Chambre dans Orbis dont 9 Covid positif:


Unnamed: 0,ipp,code_chambre,hospital_name,is_pcr,radio,is_covid
124,8003232267,,ABC,1.0,0.0,1
389,8014201282,,APR,1.0,0.0,1
413,8001043127,,APR,1.0,0.0,1
898,8014207211,,BCT,1.0,0.0,1
923,8014213145,,BCT,1.0,0.0,1
1098,8001598842,,BCT,1.0,0.0,1
1213,8014228263,,BCT,0.0,1.0,1
1281,8003471485,,BCT,1.0,1.0,1
1347,8008818941,,BCT,1.0,1.0,1


In [244]:
# add hospital_name to sirius 

sirius['hospital_name'] = sirius['code_hospital'].map(code_hospital)

In [245]:
# merge Sirius and Orbis

orbis =\
    pd.merge(orbis,
             sirius,
             how='left',
             on=['hospital_name', 
                 'code_chambre'])

In [246]:
# merge capacitaire 

orbis =\
    pd.merge(orbis,
             capacitaire,
             how='left',
             on=['hospital_name', 
                 'covid_service'])

In [247]:
df = orbis.groupby(['hospital_name', 
               'covid_service'])\
     .agg({'ipp': 'count',
           'is_covid': 'sum',
           'lits_ouverts': 'max',
           'lits_ouverts_covid': 'max'}).reset_index()

In [252]:
# What's up in Berck? 
orbis.query("hospital_name == 'BRK'").query("is_covid==1")

Unnamed: 0,ipp,code_chambre,hospital_name,is_pcr,radio,is_covid,code_hospital,covid_service,lits_ouverts,lits_ouverts_covid,is_covid_dedicated
847,8013904331,M212,BRK,1.0,0.0,1,9.0,SSR MENARD 2 - COVID +,,,
