In [1]:
import pandas as pd
import os
import shutil
import matplotlib.pyplot as plt 

pd.set_option('display.max_columns', None)  
from thesis_lib.data_processing import *
from thesis_lib.visual import *
import warnings
warnings.filterwarnings('ignore')
from datetime import timedelta

In [2]:
db = load_parquet('parquet_data')

Loading dataset:  laboratory
	 3086085
Loading dataset:  images
	 350403
Loading dataset:  hospitalizations
	 88256
Loading dataset:  surgeries
	 66750
Loading dataset:  sectors_admissions
	 154481
Loading dataset:  hospital_sectors
	 36
Formating integer columns
Formating date columns
Formating time columns
Formating datetime columns


In [4]:
db['sectors_admissions'].sort_values(by=['patient_id','admission_id','sector_admission_date']).head()

Unnamed: 0,admission_id,patient_id,sector_admission_date,sector_admission_time,sector_code,category,sector_admission_datetime
65732,507351-7,1000021-0,2018-05-20,14:00:00,CEG,C,2018-05-20 14:00:00
65733,507351-7,1000021-0,2018-05-20,18:05:00,T17,C,2018-05-20 18:05:00
74145,512368-7,1000021-0,2018-07-18,12:00:00,UC4,I,2018-07-18 12:00:00
74232,512368-7,1000021-0,2018-07-19,13:20:00,T17,C,2018-07-19 13:20:00
145486,552570-0,1000021-0,2019-10-08,10:22:00,PQU,I,2019-10-08 10:22:00


In [98]:
from datetime import datetime
class Patient:
    def __init__(self, patient_id):
        self.patient_id = patient_id

    def load_patient_data(self,db):
        df = db['hospitalizations']
        self.hospitalizations_data = df[df['patient_id'] == self.patient_id].sort_values(by='admission_date')
        
        df = db['laboratory']
        self.laboratory_data = df[df['patient_id'] == self.patient_id].sort_values(by=['admission_date',
                                                                                      'labo_date'])
       
        df = db['images']
        self.images_data = df[df['patient_id'] == self.patient_id].sort_values(by=['admission_date',
                                                                                  'image_date'])
        
        df = db['surgeries']
        self.surgeries_data = df[df['patient_id'] == self.patient_id].sort_values(by=['admission_date',
                                                                                      'surgery_date'])
        
        
        df = db['sectors_admissions']
        self.sectors_data = df[df['patient_id'] == self.patient_id].sort_values(by=['admission_id',
                                                                                    'sector_admission_date'])
    
    @property
    def admission_history(self): 
        
        admission_key = namedtuple('admission_key', ['admission_number','admission_id'])
        
        if hasattr(self, 'hospitalizations_data'):
            return {admission_key(admission_number,admission_id) : Admission(admission_id, self) 
                   for admission_number, admission_id in enumerate(self.hospitalizations_data.admission_id)}
        
        else:
            print('Load patient data before')
    
    def get_historic_records(self):

        records_history = pd.DataFrame()
        
        for admission in self.admission_history:
            admission_records = self.admission_history[admission].get_admission_records()
            if records_history.empty:
                records_history = admission_records
            else:
                records_history  = records_history.append(admission_records, ignore_index=True)
        
        return records_history
            

In [126]:
from collections import namedtuple
class Admission():
    def __init__(self, admission_id, patient):
        self.admission_id = admission_id
        
        self.admission_data = patient.hospitalizations_data[
                                    patient.hospitalizations_data.admission_id == admission_id
                                                            ].to_dict(orient='records')[0]
        self.discharge_datetime = datetime.strptime(self.admission_data['discharge_datetime'], '%Y-%m-%d %H:%M:%S')
        
        self.laboratory_data = patient.laboratory_data[
                                    patient.laboratory_data.admission_id == admission_id
                                                        ]
        self.images_data = patient.images_data[
                                    patient.images_data.admission_id == admission_id
                                                ]
        self.surgeries_data = patient.surgeries_data[
                                    patient.surgeries_data.admission_id == admission_id
                                                ]
        
        self.sectors_data = patient.sectors_data[
                                    patient.sectors_data.admission_id == admission_id
                                                ]
        
        self.sectors_data ['sector_stay'] = self.sectors_data.sector_admission_datetime.diff(periods=1
                                                                                            ).astype('timedelta64[m]'
                                                                                            ).shift(-1)
        
        last_sector_stay = (self.discharge_datetime - self.sectors_data.iloc[-1].sector_admission_datetime)
    
        
        self.sectors_data.loc[self.sectors_data.index[-1], 'sector_stay'] = int(last_sector_stay.seconds/3600)
        
        
        
    def get_labos_date_records(self,date):
        
        labos_records = {}
        #Labos 
        mask = self.laboratory_data['labo_date'] == date
        labos = self.laboratory_data[mask]

        labos_records['labos_studies_names'] =  labos.study_description.unique()
        labos_records['labos_set_count'] = labos.labo_id.nunique()
        labos_records['labos_count'] = len(labos.labo_id)
        labos_records['labos_requester_roles'] = labos.requester_role.unique()
        labos_records['labos_requester_roles_count'] = labos.requester_role.nunique()
        labos_records['labos_emergencies'] = labos.emergency.sum()
        labos_records['labos_requesters'] = labos.requester_name.unique()
        labos_records['labos_requesters_count'] = labos.requester_name.nunique()
        
        return labos_records 
    
    def get_images_date_records(self,date):
        
        images_records = {}
        #images 
        mask = self.images_data['image_date'] == date
        images = self.images_data[mask]

        images_records['images_count'] =  images.image_id.nunique()
        images_records['images_study_types'] =  images.type_of_service.unique()
        images_records['images_study_types_count'] =  images.type_of_service.nunique()
        images_records['images_studies_names'] =  images.study_description.unique()
        
        images_records['images_requesters'] = images.requester_name.unique()
        images_records['images_requesters_count'] = images.requester_name.nunique()
        images_records['images_requester_roles'] = images.requester_role.unique()
        images_records['images_requester_roles_count'] = images.requester_role.nunique()
        images_records['images_emergencies'] = images.emergency.sum()
    
        return images_records 
    
    def get_surgeries_date_records(self,date):
        
        surgeries_records = {}
        #surgeries
        mask = self.surgeries_data['surgery_date'] == date
        surgeries = self.surgeries_data[mask]

        surgeries_records['surgeries_count'] =  len(surgeries.surgery_id.values)
        
        surgeries_records['surgeries_types'] =  surgeries.surgery_type.values
        
        surgeries_records['surgeries_types_count'] =  surgeries.surgery_type.nunique()
        
        surgeries_records['surgeries_scheduled'] =  surgeries.scheduled_surgery.values
        
        surgeries_records['surgeries_scheduled_done'] = surgeries.scheduled_surgery_done.values
                                                        
        surgeries_records['surgeries_actual'] = surgeries.actual_surgery.values
        
        surgeries_records['surgeries_pre_surgery_duration'] = surgeries.pre_surgery_duration.sum()
        surgeries_records['surgeries_surgery_duration'] = surgeries.surgery_duration.sum()
        surgeries_records['surgeries_post_surgery_duration'] = surgeries.post_surgery_duration.sum()
        surgeries_records['surgeries_prep_duration'] = surgeries.surgery_prep_duration.sum()
        surgeries_records['surgeries_surgery_delay'] = surgeries.surgery_delay.sum()
        
        surgeries_records['surgeries_injury_condition'] = surgeries.injury_condition.values
        surgeries_records['surgeries_post_surgery_condition'] = surgeries.post_surgery_condition.values
        surgeries_records['surgeries_services'] = surgeries.service_description.values
        surgeries_records['surgeries_services_count'] =  surgeries.service_description.nunique()
        
        surgeries_records['surgeries_anesthesia_types'] =  surgeries.anesthesia_type.values
        surgeries_records['surgeries_bact_positive'] =  surgeries.bact_positive.values

        return surgeries_records 
        
        
    def get_admission_records(self):
        
        length_of_stay = (self.admission_data['discharge_date']-
                          self.admission_data['admission_date']).days
        
        df = pd.DataFrame(columns=['patient_id',
                                   'admission_id', 
                                   'no._of_admission',
                                   'hosp_day_number', 
                                   'date',
                                   'discharge'])
        
        
        row = {'patient_id': patient.patient_id,
               'admission_id':self.admission_id,
               'no._of_admission': [key.admission_number for key in patient.admission_history.keys() if 
                                        key.admission_id == self.admission_id][0],
               'labos_cumulative': 0,
               'images_cumulative': 0,
               'labos_set_cumulative':0,
               'surgeries_cumulative': 0}
          
        for day in range(length_of_stay):
            row['date'] = self.admission_data['admission_date'] + timedelta(day)
            row['hosp_day_number'] =  day
            row['discharge'] =  (True if day == (length_of_stay -1) else False)
            
            #Labos 
            labos_records = self.get_labos_date_records(row['date'])
            row.update(labos_records)
            row['labos_cumulative'] += row['labos_count']
            row['labos_set_cumulative'] += row['labos_set_count']
            
            #Images
            images_records = self.get_images_date_records(row['date'])
            row.update(images_records)
            row['images_cumulative'] += row['images_count']
            
            #Surgeries
            surgeries_records = self.get_surgeries_date_records(row['date'])
            row.update(surgeries_records)
            row['surgeries_cumulative'] += row['surgeries_count']
            
            #Sectors
            
            df = df.append(row, ignore_index=True)

        return df

In [127]:
patient = Patient('3714610-6')

In [128]:
patient.load_patient_data(db)

In [129]:
patient.admission_history

{admission_key(admission_number=0, admission_id='466492-9'): <__main__.Admission at 0x1189b3ba8>}

In [130]:
patient.sectors_data

Unnamed: 0,admission_id,patient_id,sector_admission_date,sector_admission_time,sector_code,category,sector_admission_datetime
32,466492-9,3714610-6,2017-01-01,14:00:00,EME,I,2017-01-01 14:00:00
33,466492-9,3714610-6,2017-01-01,18:38:00,UC1,I,2017-01-01 18:38:00
1790,466492-9,3714610-6,2017-01-16,14:48:00,T14,C,2017-01-16 14:48:00
1932,466492-9,3714610-6,2017-01-17,12:05:00,UC1,I,2017-01-17 12:05:00
5320,466492-9,3714610-6,2017-02-14,20:50:00,UCR,I,2017-02-14 20:50:00
7597,466492-9,3714610-6,2017-03-04,14:44:00,UCO,I,2017-03-04 14:44:00
7598,466492-9,3714610-6,2017-03-04,16:09:00,UCR,I,2017-03-04 16:09:00


In [131]:
patient.admission_history[(0,'466492-9')].sectors_data

Unnamed: 0,admission_id,patient_id,sector_admission_date,sector_admission_time,sector_code,category,sector_admission_datetime,sector_stay
32,466492-9,3714610-6,2017-01-01,14:00:00,EME,I,2017-01-01 14:00:00,278.0
33,466492-9,3714610-6,2017-01-01,18:38:00,UC1,I,2017-01-01 18:38:00,21370.0
1790,466492-9,3714610-6,2017-01-16,14:48:00,T14,C,2017-01-16 14:48:00,1277.0
1932,466492-9,3714610-6,2017-01-17,12:05:00,UC1,I,2017-01-17 12:05:00,40845.0
5320,466492-9,3714610-6,2017-02-14,20:50:00,UCR,I,2017-02-14 20:50:00,25554.0
7597,466492-9,3714610-6,2017-03-04,14:44:00,UCO,I,2017-03-04 14:44:00,85.0
7598,466492-9,3714610-6,2017-03-04,16:09:00,UCR,I,2017-03-04 16:09:00,14.0


In [110]:

        
    @property       
    def gender(self):
        try:
            rv = self.hospitalizations_data['gender'].unique()[0]
        except:
            print('Load patient data before! ')
        
        return rv
            
    @property
    def insurance(self):
        try:
            rv = self.hospitalizations_data['insurance_entity'].unique()[0]
        except:
            print('Load patient data before!')
            
        return rv
    
    @property
    def birth_date(self):
        try: 
            rv = datetime.strptime(self.hospitalizations_data['birth_date'].unique()[0],
                                            '%Y-%m-%d')
        except:
            print('Load patient data before!')
            
        return rv
        
    def get_demographics(self):
        self.demographics = {'gender': self.gender ,
                             'insurance': self.insurance ,
                             'birth_date': self.birth_date}
        
        return self.demographics
        
        
    def get_admissions(self):
        
        if not hasattr(self, 'hospitalizations_data'):
            print('Load patient\'s hospitalizations_data before! ')
            
        self.admissions = {} 
        
        for i,admission_id in enumerate(self.hospitalizations_data['admission_id']):
            
            admission_data = self.hospitalizations_data[(self.hospitalizations_data['admission_id'] == 
                                                         admission_id)].to_dict('list')
            
            admission_data['n_admission'] = i
            
            admission =  Admission(admission_data)
            
            self.admissions[str(admission._admission_id)] = admission
            
    def admission_history(self, admission_id):
        
        admission = self.admissions[admission_id]
        
        length_of_stay = (admission._discharge_date-admission._admission_date).days
        
        df = pd.DataFrame(columns=['patient_id',
                                   'admission_id', 
                                   'no._of_admission',
                                   'hosp_day_number', 
                                   'date',
                                   'discharge',
                                   'no._of_labo'])
        
        for day in range(length_of_stay):
            row = {'patient_id': self.patient_id,
                   'admission_id': admission._admission_id,
                   'no._of_admission': admission._patient_n_admission,
                   'no._of_labo': 
                   'date': admission._admission_date + timedelta(day),
                   'hosp_day_number': day, 
                   'discharge': (True if day == (length_of_stay -1) else False)
                   
            }
            df = df.append(row, ignore_index=True)
            #pd.concat([df,row], ignore_index=True)
            
        return df
        
        
 
        
        
        
        
    

In [111]:
class Admission(Patient):
    def __init__(self, admission_data):
        
        self._admission_id = admission_data['admission_id'][0]
        self._admission_date = datetime.strptime(admission_data['admission_date'][0],
                                                 '%Y-%m-%d')
        self._admission_hour =  admission_data['admission_time'][0]
        self._patient_n_admission = admission_data['n_admission']
        
        self._admission_source = admission_data['Procedencia'][0]
        self._admission_physician = admission_data['admission_physician'][0]
        self._admission_sector = admission_data['SecAdmisión'][0]
        
        
        self._discharge_date = datetime.strptime(admission_data['discharge_date'][0],'%Y-%m-%d')
        self._discharge_hour = admission_data['discharge_time'][0]
        self._discarge_reason = admission_data['Motivo Alta'][0]
        self._pre_discharge_date = admission_data['PreAlta'][0]
        self._discharge_physician = admission_data['Médico Denuncia Egreso'][0]
        
        self._diagnosis = {'administrative_diagnosis_uncod': admission_data['Diagnóstico Administrativo No Codificado'][0],
                           'presumptive_diagnosis_cod': admission_data['CodDiagPresu'][0], 
                           'presumptive_diagnosis_uncod': admission_data['Diagnóstico Presuntivo'][0], 
                           'discharge_diagnosis_cod': admission_data['CodCieDiagEgr'][0], 
                           'discharge_diagnosis_uncod': admission_data['Diagnóstico Egreso'][0],
                           'high_risk_TEP_diagnosis': admission_data['DiagAltoRiesgoTEP'][0],
                           'low_risk_TEP_diagnosis': admission_data['DiagBajoRiesgoTEP'][0]}
        
        self._surgery = admission_data['Quirurg'][0] == 'Sí'
        self._hips_surgery = admission_data['CaderaExpress'][0] == 'Sí'
        self._is_1st_admission = admission_data['Reingreso'][0] == 'No'
        
        self._admission_data = admission_data
        
        
        
    

In [112]:
paciente = Patient('3526141-4')

In [113]:
paciente.load_patient_data('hospitalizations',internaciones)

In [114]:
paciente.hospitalizations_data

Unnamed: 0,Nro Adm,Nro H.C.,Edad,Sexo,Fec Nac,Entidad,AgrupEntidad,Fec Adm,Hora Adm,AñoAdm,MesAdm,Procedencia,Médico Admisión,SecAdmisión,SecUltimo,CategUlt,Aislación,HabitUlt,CamaUlt,Fec Alta,Hora Alta,AñoAlta,MesAlta,Motivo Alta,PreAlta,1erSecInt,Diagnóstico Administrativo No Codificado,CodDiagPresu,Diagnóstico Presuntivo,CodCieDiagEgr,Diagnóstico Egreso,FecDenunEgre,HoraDenunEgre,Médico Denuncia Egreso,Epicrisis,MédicoEpicrisis,Quirurg,CaderaExpress,Permanencia,Servicio Responsable,Servicio Co-Responsable,FecHorIngGua,Prest.Guardia,Prestac.Guardia,Reingreso,AdmAntReciente,FecAdmAntReciente,FecAltaAntReciente,SecAntReciente,Diagn. Egreso Admisión Anterior Reciente,AmbulanciaEgreso,PesoAlNacer,EdadGestac,PIM2TEP,DiagAltoRiesgoTEP,DiagBajoRiesgoTEP,ARM_TEP,CEC_TEP,SolicDerivación,OrigDerivación,Procedencia.1,DiagnósticoDerivación,AreaDerivación,Notificado,UsuarioDeriv
0,466470-1,3526141-4,79,F,1937-11-15,PAMI UGL VI,PAMI,2017-01-01,02:42:00,2017,Ene,Emergencias,VEDIA VARGAS ANICETA,EME,T16,C,,1634,2,2017-01-04,18:17:00,2017.0,Ene,Alta Medica,,T16,DESHIDRATACION,XX2,No Especifica Codigo,276.51,Deshidratacion,2017-01-04,16:47:41,SIMONE MILENA,Sí,SIMONE MILENA,No,No,3,CLINICA MEDICA,,,,,No,,,,,,Sí,,,,,,,,,,,,,,
33624,500538-7,3526141-4,100,F,1917-11-15,PAMI UGL VI,PAMI,2018-02-26,15:24:00,2018,Feb,Emergencias,ROJAS MARIANA GISELLE,EME,T11,C,,1104,2,2018-03-02,17:16:00,2018.0,Mar,Alta Medica,,T11,NEUMOPATIA,786.00,"Alteracion Respiratoria, Sin Especificar",428.0,Insuficiencia Cardiaca,2018-03-02,14:23:38,ROCCA LUCIANA,Sí,ROCCA LUCIANA,No,No,4,CLINICA MEDICA,,,,,No,,,,,,No,,,,,,,,43157.6161,DRA AGRA,DOMICILIO,NEUMOPATIA,INTERNACION GENERAL,DR HEMINSEN,JFOSTER


In [115]:
paciente.get_demographics()

{'birth_date': datetime.datetime(1937, 11, 15, 0, 0),
 'gender': 'F',
 'insurance': 'PAMI UGL VI'}

In [116]:
paciente.demographics

{'birth_date': datetime.datetime(1937, 11, 15, 0, 0),
 'gender': 'F',
 'insurance': 'PAMI UGL VI'}

In [117]:
paciente.get_admissions()

In [118]:
paciente.admissions

{'466470-1': <__main__.Admission at 0x1182fd668>,
 '500538-7': <__main__.Admission at 0x117ff4ba8>}

In [119]:
paciente.admissions['466470-1']._admission_date

datetime.datetime(2017, 1, 1, 0, 0)

In [120]:
paciente.admissions['500538-7']._admission_date

datetime.datetime(2018, 2, 26, 0, 0)

In [121]:
paciente.admissions['466470-1']._admission_date

datetime.datetime(2017, 1, 1, 0, 0)

In [122]:
paciente.admissions['466470-1']._admission_date + timedelta(1)

datetime.datetime(2017, 1, 2, 0, 0)

In [123]:
paciente.admission_history('466470-1')

Unnamed: 0,patient_id,admission_id,no._of_admission,hosp_day_number,date,discharge
0,3526141-4,466470-1,0,0,2017-01-01,False
1,3526141-4,466470-1,0,1,2017-01-02,False
2,3526141-4,466470-1,0,2,2017-01-03,True


In [127]:
paciente.admission_history('500538-7')

Unnamed: 0,patient_id,admission_id,no._of_admission,hosp_day_number,date,discharge
0,3526141-4,500538-7,1,0,2018-02-26,False
1,3526141-4,500538-7,1,1,2018-02-27,False
2,3526141-4,500538-7,1,2,2018-02-28,False
3,3526141-4,500538-7,1,3,2018-03-01,True


In [125]:
paciente.load_patient_data('laboratory',db['labos'])

In [133]:
paciente.laboratory_data.groupby('Fecha').count()

Unnamed: 0_level_0,Nro Vale,Pun,Estado,Hora,HH,AñoMes,Sector,Nro Adm,HistClín,Nombre Paciente,Fec.Adm.,Fec.Alta,Entidad,Nombre Entidad,Nro.Afiliado,GrupoEnt,Urgencia,Mnemo Serv,Servicio,Cod Serv,Nombre del Solicitante,Función del Solicitante,Prestación,Descrip Prestación,CantPrest,Cod Insumo,Descrip Insumo,CantInsumos,EstadoResultado,Observ.Estudio
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
01/01/2017,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,0,0,0,0,0
02/01/2017,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,0,0,0,0,0
03/01/2017,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,0
26/02/2018,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,0,0,0,0,0
27/02/2018,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0


In [95]:
(paciente.admissions['466470-1']._discharge_date-paciente.admissions['466470-1']._admission_date).days

3

In [31]:
for admission_id in paciente.admissions:
    print(paciente.admissions[admission_id]._admission_hour)

02:42:00
15:24:00
