# DATA TRANSFORMATION - 8_hospitalizaciones
## 0. Previos

Se cargan las bases y las librerías a utilizar.

In [1]:
# Librerías
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

import generic_funcions as gf

ruta_archivos = 'C:/Users/monic/documentos/ds4a/project/datos/'

## 1. Load

In [2]:
dcc = gf.diccionario_llaves()

In [3]:
modulo = 'Hospitalizaciones'

In [4]:
base = gf.carga_datos( ruta = ruta_archivos, diccionario = dcc, modulo = modulo )
base.head()

Unnamed: 0,id,Sexo,EDAD (Años),Id Diagnostico Egreso,Descripción diagnostico (egreso),Dias Uci,Dias Uce,Días de Estancia (Calculada),fecha,Fecha Egreso,year,month,year_month
0,1381384,Femenino,43,R060,DISNEA,0,0,0,2020-05-22,2020-05-22,2020,5,202005
1,1118363,Masculino,6,R060,DISNEA,0,0,0,2020-01-02,2020-01-02,2020,1,202001
2,1118363,Masculino,6,R060,DISNEA,0,0,0,2020-01-02,2020-01-02,2020,1,202001
3,978163,Masculino,12,E848,FIBROSIS QUÍSTICA CON OTRAS MANIFESTACIONES,0,0,0,2019-05-23,2019-05-23,2019,5,201905
4,1140753,Masculino,31,J459,"ASMA, NO ESPECIFICADA",0,0,0,2018-08-22,2018-08-22,2018,8,201808


## 2. Transform

In [5]:
base = gf.letra_codigo( base, 'Id Diagnostico Egreso' )
base.head()

Unnamed: 0,id,Sexo,EDAD (Años),Id Diagnostico Egreso,Descripción diagnostico (egreso),Dias Uci,Dias Uce,Días de Estancia (Calculada),fecha,Fecha Egreso,year,month,year_month,Id Diagnostico Egreso_cod
0,1381384,Femenino,43,R060,DISNEA,0,0,0,2020-05-22,2020-05-22,2020,5,202005,r
1,1118363,Masculino,6,R060,DISNEA,0,0,0,2020-01-02,2020-01-02,2020,1,202001,r
2,1118363,Masculino,6,R060,DISNEA,0,0,0,2020-01-02,2020-01-02,2020,1,202001,r
3,978163,Masculino,12,E848,FIBROSIS QUÍSTICA CON OTRAS MANIFESTACIONES,0,0,0,2019-05-23,2019-05-23,2019,5,201905,e
4,1140753,Masculino,31,J459,"ASMA, NO ESPECIFICADA",0,0,0,2018-08-22,2018-08-22,2018,8,201808,j


* Number of hospitalizations and number of hospitalizations related with athsma
* Number of days in UCI and Number of days in UCI related with athsma

In [6]:
base_ = base.groupby(['id','year','month','Id Diagnostico Egreso_cod'])[['Dias Uci','Dias Uce','Días de Estancia (Calculada)']].sum().reset_index()
base_['num'] = 1
base_.head()

Unnamed: 0,id,year,month,Id Diagnostico Egreso_cod,Dias Uci,Dias Uce,Días de Estancia (Calculada),num
0,500547,2015,10,n,0,0,10,1
1,502989,2014,11,m,0,0,13,1
2,502989,2015,4,j,0,0,5,1
3,502989,2016,8,s,0,0,7,1
4,506808,2010,7,j,0,0,3,1


In [7]:
# all diagnoses different form j will be in the same category
base_['diag'] = base_['Id Diagnostico Egreso_cod'].apply( lambda x : 'j' if x == 'j' else 'otra' )

In [8]:
base_.rename( columns={ 'Dias Uci':'uci', 'Dias Uce':'uce',  'Días de Estancia (Calculada)':'est' }, inplace=True)

In [9]:
base_.head()

Unnamed: 0,id,year,month,Id Diagnostico Egreso_cod,uci,uce,est,num,diag
0,500547,2015,10,n,0,0,10,1,otra
1,502989,2014,11,m,0,0,13,1,otra
2,502989,2015,4,j,0,0,5,1,j
3,502989,2016,8,s,0,0,7,1,otra
4,506808,2010,7,j,0,0,3,1,j


In [10]:
# pivot table to have separate variables, nans replaced with zero
base_p = base_.pivot_table(index=['id','year','month'], columns='diag', values=['uci','uce','est','num'], aggfunc=np.sum).reset_index()
base_p.columns = ['_'.join(col).strip() for col in base_p.columns.values]
base_p.fillna( 0 , inplace = True)
base_p.head()

Unnamed: 0,id_,year_,month_,est_j,est_otra,num_j,num_otra,uce_j,uce_otra,uci_j,uci_otra
0,500547,2015,10,0.0,10.0,0.0,1.0,0.0,0.0,0.0,0.0
1,502989,2014,11,0.0,13.0,0.0,1.0,0.0,0.0,0.0,0.0
2,502989,2015,4,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,502989,2016,8,0.0,7.0,0.0,1.0,0.0,0.0,0.0,0.0
4,506808,2010,7,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [11]:
base_p['num'] = base_p['num_otra'] + base_p['num_j']
base_p['uci'] = base_p['uci_otra'] + base_p['uci_j']
base_p['uce'] = base_p['uce_otra'] + base_p['uce_j']
base_p['est'] = base_p['est_otra'] + base_p['est_j']

base_p.head()

Unnamed: 0,id_,year_,month_,est_j,est_otra,num_j,num_otra,uce_j,uce_otra,uci_j,uci_otra,num,uci,uce,est
0,500547,2015,10,0.0,10.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,10.0
1,502989,2014,11,0.0,13.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,13.0
2,502989,2015,4,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0
3,502989,2016,8,0.0,7.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,7.0
4,506808,2010,7,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0


In [12]:
base_p = base_p[['id_', 'year_', 'month_','num','num_j','est','est_j','uci','uci_j','uce','uce_j']]

In [13]:
# add prefix
pre = dcc[modulo]['prefi'] + '_'
base_p.columns = [ pre + s for s in base_p.columns]

In [14]:
base_p.rename( columns={ pre + 'id_':'id', pre + 'year_':'year', pre + 'month_':'month' }, inplace=True)
base_p.head()

Unnamed: 0,id,year,month,hos_num,hos_num_j,hos_est,hos_est_j,hos_uci,hos_uci_j,hos_uce,hos_uce_j
0,500547,2015,10,1.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0
1,502989,2014,11,1.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0
2,502989,2015,4,1.0,1.0,5.0,5.0,0.0,0.0,0.0,0.0
3,502989,2016,8,1.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0
4,506808,2010,7,1.0,1.0,3.0,3.0,0.0,0.0,0.0,0.0


In [15]:
base_p.shape

(1265, 11)

In [16]:
base_p.describe()

Unnamed: 0,id,year,month,hos_num,hos_num_j,hos_est,hos_est_j,hos_uci,hos_uci_j,hos_uce,hos_uce_j
count,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0,1265.0
mean,1189168.0,2014.675099,6.532016,1.040316,0.607115,8.321739,5.025296,0.337549,0.287747,0.381028,0.250593
std,434955.1,3.37004,3.365517,0.200758,0.488585,11.79164,9.690429,2.830506,2.788888,1.606047,1.201779
min,500547.0,2008.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,813655.0,2012.0,4.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
50%,1194113.0,2015.0,7.0,1.0,1.0,5.0,3.0,0.0,0.0,0.0,0.0
75%,1536265.0,2018.0,9.0,1.0,1.0,9.0,6.0,0.0,0.0,0.0,0.0
max,1999536.0,2020.0,12.0,3.0,1.0,147.0,147.0,80.0,80.0,24.0,18.0


## 3. Merge with ids table

In [17]:
ids_mensual = gf.base_ids_mensual( ruta_archivos )
base_final_hos = ids_mensual.merge( base_p, how='left')
base_final_hos.head()

Unnamed: 0,id,year,month,hos_num,hos_num_j,hos_est,hos_est_j,hos_uci,hos_uci_j,hos_uce,hos_uce_j
0,500547,2016,1,,,,,,,,
1,500547,2016,2,,,,,,,,
2,500547,2016,3,,,,,,,,
3,500547,2016,4,,,,,,,,
4,500547,2016,5,,,,,,,,


In [18]:
base_final_hos.shape

(53280, 11)