In [1]:
from io import BytesIO
from zipfile import ZipFile
import pandas as pd
import numpy as np
import os
import datetime

In [17]:
path = "/home/nacho/Documents/coronavirus/COVID-19_Paper/"
os.chdir(os.path.join(path)) 

In [18]:
df = pd.read_csv("datos_abiertos_covid19.zip")

In [4]:
list(df)

['FECHA_ACTUALIZACION',
 'ID_REGISTRO',
 'ORIGEN',
 'SECTOR',
 'ENTIDAD_UM',
 'SEXO',
 'ENTIDAD_NAC',
 'ENTIDAD_RES',
 'MUNICIPIO_RES',
 'TIPO_PACIENTE',
 'FECHA_INGRESO',
 'FECHA_SINTOMAS',
 'FECHA_DEF',
 'INTUBADO',
 'NEUMONIA',
 'EDAD',
 'NACIONALIDAD',
 'EMBARAZO',
 'HABLA_LENGUA_INDIG',
 'INDIGENA',
 'DIABETES',
 'EPOC',
 'ASMA',
 'INMUSUPR',
 'HIPERTENSION',
 'OTRA_COM',
 'CARDIOVASCULAR',
 'OBESIDAD',
 'RENAL_CRONICA',
 'TABAQUISMO',
 'OTRO_CASO',
 'TOMA_MUESTRA_LAB',
 'RESULTADO_LAB',
 'TOMA_MUESTRA_ANTIGENO',
 'RESULTADO_ANTIGENO',
 'CLASIFICACION_FINAL',
 'MIGRANTE',
 'PAIS_NACIONALIDAD',
 'PAIS_ORIGEN',
 'UCI']

In [19]:
def filter_exclude_columns(df):
    df.drop(['FECHA_ACTUALIZACION', 'ID_REGISTRO', 'ORIGEN', 'MIGRANTE', 'PAIS_ORIGEN', 'PAIS_NACIONALIDAD','MUNICIPIO_RES','ENTIDAD_NAC', 'NACIONALIDAD','HABLA_LENGUA_INDIG', 'INDIGENA', 'TOMA_MUESTRA_LAB', 'RESULTADO_LAB', 'TOMA_MUESTRA_ANTIGENO', 'RESULTADO_ANTIGENO'], axis=1, inplace = True) #Se eliminan las columnas innecesarias
    return df


def date_preprocessing(df):
    #convierte a tipo fecha fecha_sintoma 
    df['FECHA_SINTOMAS'] = pd.to_datetime(df['FECHA_SINTOMAS'])
    #restar columna FECHA_INGRESO menos FECHA_SINTOMAS y guardar en columna dias_dif
    df['FECHA_INGRESO'] = pd.to_datetime(df['FECHA_INGRESO'])
    df['DIAS_DIF_HOSP'] = (df['FECHA_INGRESO'] - df['FECHA_SINTOMAS'])
    df.DIAS_DIF_HOSP = df.DIAS_DIF_HOSP.dt.days
    #eliminar todos los dias negativos
    df.drop(df[df['DIAS_DIF_HOSP'] < 0].index, inplace = True)
    #verificacion
    df['DIAS_DIF_HOSP'][df['DIAS_DIF_HOSP'] < 0]
    #df['DIAS_DIF_HOSP'].astype(int)
    return df

def filter_negative_dates(df):
    #hace una copia ed fecha_def a dias_dif_def
    df['BOOL_DEF'] = df["FECHA_DEF"].copy()
    #CREAR COLUMNA DE NUMERO DE DIAS DE SINTOMAS A FALLECIMIENTO
    #crea columna dias desde sintomas a fallecido
    #remplazar en fecha_def 9999 con nan
    df["FECHA_DEF"] = df['FECHA_DEF'].replace(['9999-99-99'], np.nan)
    #convertir fecha_def a tipo de dato fecha
    df['FECHA_DEF'] = pd.to_datetime(df['FECHA_DEF'])
    #restar defcha def menos fecha_sintomas
    df['DIAS_DIF_DEF'] = (df['FECHA_DEF'] - df['FECHA_SINTOMAS'])
    df.DIAS_DIF_DEF = df.DIAS_DIF_DEF.dt.days
    df['DIAS_DIF_DEF'] = df['DIAS_DIF_DEF'].replace([np.nan], 0)
    df['DIAS_DIF_DEF'] = df['DIAS_DIF_DEF'].astype(int)
    #validar si hay dias negativos y eliminarlos
    df.drop(df[df['DIAS_DIF_DEF'] < 0].index, inplace = True)
    #verificacion
    df['DIAS_DIF_DEF'][df['DIAS_DIF_DEF'] < 0]
    return df

def filter_binary_status(df):
    #columna defuncion cambia a 0 a los no fallecidos
    df['BOOL_DEF'] = df['BOOL_DEF'].replace(['9999-99-99'], 0)
    #columna defuncion cambia a 1 los fallecidos
    df.loc[df["BOOL_DEF"] != 0, ["BOOL_DEF"]] = 1
    return df
    
def confirmed_covid(df):
    df['RESULTADO'] = df['CLASIFICACION_FINAL'].copy()
    df.drop(df['CLASIFICACION_FINAL'])
    dictionary = ['RESULTADO']
    for condition in dictionary:
        df.loc[df[condition] != 3, [condition]] = 0
    df.loc[df[condition] == 3, [condition]] = 1
    return df

def preprocessing(df_prediction):
    df_prediction = df_prediction[df_prediction['RESULTADO'] == 1] #filtrar solo gente positiva covid
    df_prediction.loc[df_prediction['SEXO'] == 2, ['SEXO']] = 0 #Hombre es 0, Mujer es 1
    df_prediction.loc[df_prediction['EMBARAZO'] == 97, ['EMBARAZO']] = 2
    df_prediction.loc[df_prediction['EMBARAZO'] == 98, ['EMBARAZO']] = 2
    df_prediction.loc[df_prediction['EMBARAZO'] == 99, ['EMBARAZO']] = 2
    #eliminar los hombres embarazados***
    df_prediction.drop(df_prediction[(df_prediction['SEXO'] ==0) & (df_prediction['EMBARAZO'] ==1)].index, inplace = True)
    #filtra tipo paciente en 0:No hosp, 1:Si hosp
    df_prediction.loc[df_prediction['TIPO_PACIENTE'] == 1, ['TIPO_PACIENTE']] = 0
    df_prediction.loc[df_prediction['TIPO_PACIENTE'] == 2, ['TIPO_PACIENTE']] = 1
    return df_prediction

In [20]:
def print_values(conditions, df):
    for i in df[df.columns]:
        #x = df[i].value_counts()
        x = (df[i].value_counts()/df[i].count())*100
        print(i,"\n",x)
        print()

In [21]:
#preprocessing
df = filter_exclude_columns(df)
df = date_preprocessing(df)
df = filter_negative_dates(df)
df = filter_binary_status(df)
df = confirmed_covid(df)
df = preprocessing(df)

In [9]:
list(df)

['SECTOR',
 'ENTIDAD_UM',
 'SEXO',
 'ENTIDAD_RES',
 'TIPO_PACIENTE',
 'FECHA_INGRESO',
 'FECHA_SINTOMAS',
 'FECHA_DEF',
 'INTUBADO',
 'NEUMONIA',
 'EDAD',
 'EMBARAZO',
 'DIABETES',
 'EPOC',
 'ASMA',
 'INMUSUPR',
 'HIPERTENSION',
 'OTRA_COM',
 'CARDIOVASCULAR',
 'OBESIDAD',
 'RENAL_CRONICA',
 'TABAQUISMO',
 'OTRO_CASO',
 'CLASIFICACION_FINAL',
 'UCI',
 'DIAS_DIF_HOSP',
 'BOOL_DEF',
 'DIAS_DIF_DEF',
 'RESULTADO']

In [10]:
print("Numero de casos positivos de COVID: ", len(df))
print("Numero de hospitalizados por COVID: ", df.TIPO_PACIENTE.value_counts()[1])
print("Numero de intubados por COVID: ", df.INTUBADO.value_counts()[1])
print("Numero de fallecidos por COVID: ", df.BOOL_DEF.value_counts()[1])
print("Numero de UCI por COVID: ", df.UCI.value_counts()[1])

Numero de casos positivos de COVID:  2221087
Numero de hospitalizados por COVID:  419927
Numero de intubados por COVID:  54808
Numero de fallecidos por COVID:  204615
Numero de UCI por COVID:  33418


In [12]:
#CASO 1: si el paciente contagiado de CoV-2 necesitará hospitalización 

In [22]:
df_caso1 = df.copy()
df_caso1.drop(df_caso1[(df_caso1['TIPO_PACIENTE'] == 99)].index, inplace = True)

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA']
for condition in conditions:
    df_caso1 = df_caso1.loc[~((df_caso1[condition] == 97) | (df_caso1[condition] == 98) | (df_caso1[condition] == 99))]
    df_caso1.loc[df_caso1[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso1_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','TIPO_PACIENTE']
df_caso1 = df_caso1[df_caso1.columns.intersection(final_caso1_columns)]

#print_values(final_caso1_columns, df_caso1)
print_values(['TIPO_PACIENTE'], df_caso1)
print(df_caso1.shape)

SEXO 
 0    50.126135
1    49.873865
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 0    81.134497
1    18.865503
Name: TIPO_PACIENTE, dtype: float64

EDAD 
 30     2.268795
31     2.222789
29     2.210948
28     2.198068
32     2.168195
         ...   
117    0.000181
114    0.000181
113    0.000136
119    0.000045
111    0.000045
Name: EDAD, Length: 122, dtype: float64

EMBARAZO 
 0    99.367971
1     0.632029
Name: EMBARAZO, dtype: float64

DIABETES 
 0    86.604833
1    13.395167
Name: DIABETES, dtype: float64

EPOC 
 0    98.882731
1     1.117269
Name: EPOC, dtype: float64

ASMA 
 0    97.795605
1     2.204395
Name: ASMA, dtype: float64

INMUSUPR 
 0    99.17364
1     0.82636
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    82.584254
1    17.415746
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    98.44535
1     1.55465
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    85.370468
1    14.629532
Name: OBESIDAD, dtype: float64

RENAL_CRONICA 
 0    98.497819
1     1.50218

In [None]:
#CASO 1.2: si el paciente contagiado de CoV-2 necesitará hospitalización crítica

In [23]:
df_caso1_2 = df.copy()
df_caso1_2.drop(df_caso1[(df_caso1['TIPO_PACIENTE'] == 99)].index, inplace = True)
#elimina TIPO_PACIENTE = 0 (gente NO hospitalizada)
df_caso1_2.drop(df_caso1_2[(df_caso1_2['TIPO_PACIENTE'] == 0)].index, inplace = True)

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI','INTUBADO']
for condition in conditions:
    df_caso1_2 = df_caso1_2.loc[~((df_caso1_2[condition] == 97) | (df_caso1_2[condition] == 98) | (df_caso1_2[condition] == 99))]
    df_caso1_2.loc[df_caso1_2[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

def conditions(df_caso1_2):
    if (df_caso1_2['UCI'] == 1) or (df_caso1_2['INTUBADO'] == 1):
        return 1
    else:
        return 0
df_caso1_2['hosp_critica'] = df_caso1_2.apply(conditions, axis=1)

final_caso1_2_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','TIPO_PACIENTE','hosp_critica']
df_caso1_2 = df_caso1_2[df_caso1_2.columns.intersection(final_caso1_2_columns)]

#print_values(final_caso1_columns, df_caso1)
print_values(['TIPO_PACIENTE'], df_caso1_2)
print_values(['hosp_critica'], df_caso1_2)
print(df_caso1_2.shape)

SEXO 
 0    59.377464
1    40.622536
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

EDAD 
 60     2.518113
61     2.487010
63     2.480018
65     2.466516
59     2.466034
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.360827
1     0.639173
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.490446
1    32.509554
Name: DIABETES, dtype: float64

EPOC 
 0    96.521561
1     3.478439
Name: EPOC, dtype: float64

ASMA 
 0    97.994238
1     2.005762
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947463
1     2.052537
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.553447
1    38.446553
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841159
1     4.158841
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    78.660173
1    21.339827
Name: OBESIDAD, dtype: float64

RENAL_CRONICA 
 0    94.785114
1     5.214886
Name: RENAL_C

In [35]:
#CASO 2: predecir en base a los descriptores la mortalidad antes del hospital (sin filtro)

In [24]:
df_caso2 = df.copy()
df_caso2.drop(df_caso2[(df_caso2['TIPO_PACIENTE'] == 99)].index, inplace = True)
#elimina TIPO_PACIENTE = 1 (gente hospitalizada)
df_caso2.drop(df_caso2[(df_caso2['TIPO_PACIENTE'] == 1)].index, inplace = True) #revisar si mejora el rendimiento

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA']
for condition in conditions:
    df_caso2 = df_caso2.loc[~((df_caso2[condition] == 97) | (df_caso2[condition] == 98) | (df_caso2[condition] == 99))]
    df_caso2.loc[df_caso2[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso2_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','BOOL_DEF','TIPO_PACIENTE']
df_caso2 = df_caso2[df_caso2.columns.intersection(final_caso2_columns)]

#print_values(final_caso2_columns, df_caso2)
print_values(['TIPO_PACIENTE','BOOL_DEF'], df_caso2)
print(df_caso2.shape)

SEXO 
 1    52.025817
0    47.974183
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 0    100.0
Name: TIPO_PACIENTE, dtype: float64

EDAD 
 30     2.651793
29     2.595868
28     2.588126
31     2.587234
27     2.546182
         ...   
114    0.000223
113    0.000167
117    0.000167
111    0.000056
119    0.000056
Name: EDAD, Length: 122, dtype: float64

EMBARAZO 
 0    99.369178
1     0.630822
Name: EMBARAZO, dtype: float64

DIABETES 
 0    91.05068
1     8.94932
Name: DIABETES, dtype: float64

EPOC 
 0    99.432009
1     0.567991
Name: EPOC, dtype: float64

ASMA 
 0    97.751655
1     2.248345
Name: ASMA, dtype: float64

INMUSUPR 
 0    99.458189
1     0.541811
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    87.480087
1    12.519913
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    99.051678
1     0.948322
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    86.936382
1    13.063618
Name: OBESIDAD, dtype: float64

RENAL_CRONICA 
 0    99.361101
1     0.638899
Name: RENAL_CRO

In [25]:
#CASO 2.1: predecir en base a los descriptores la mortalidad (sin filtro)

In [51]:
df_caso2 = df.copy()
df_caso2.drop(df_caso2[(df_caso2['TIPO_PACIENTE'] == 99)].index, inplace = True)

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA']
for condition in conditions:
    df_caso2 = df_caso2.loc[~((df_caso2[condition] == 97) | (df_caso2[condition] == 98) | (df_caso2[condition] == 99))]
    df_caso2.loc[df_caso2[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso2_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','BOOL_DEF','TIPO_PACIENTE']
df_caso2 = df_caso2[df_caso2.columns.intersection(final_caso2_columns)]

#print_values(final_caso2_columns, df_caso2)
print_values(['TIPO_PACIENTE','BOOL_DEF'], df_caso2)
print(df_caso2.shape)

SEXO 
 0    50.126135
1    49.873865
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 0    81.134497
1    18.865503
Name: TIPO_PACIENTE, dtype: float64

EDAD 
 30     2.268795
31     2.222789
29     2.210948
28     2.198068
32     2.168195
         ...   
117    0.000181
114    0.000181
113    0.000136
119    0.000045
111    0.000045
Name: EDAD, Length: 122, dtype: float64

EMBARAZO 
 0    99.367971
1     0.632029
Name: EMBARAZO, dtype: float64

DIABETES 
 0    86.604833
1    13.395167
Name: DIABETES, dtype: float64

EPOC 
 0    98.882731
1     1.117269
Name: EPOC, dtype: float64

ASMA 
 0    97.795605
1     2.204395
Name: ASMA, dtype: float64

INMUSUPR 
 0    99.17364
1     0.82636
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    82.584254
1    17.415746
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    98.44535
1     1.55465
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    85.370468
1    14.629532
Name: OBESIDAD, dtype: float64

RENAL_CRONICA 
 0    98.497819
1     1.50218

In [27]:
#CASO 3: Mortalidad de los contagiagos DESPUES de INTUBADO,UCI (sin filtro)

In [28]:
df_caso3 = df.copy()
df_caso3.drop(df_caso3[(df_caso3['TIPO_PACIENTE'] == 99)].index, inplace = True)

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','INTUBADO','UCI']
for condition in conditions:
    df_caso3 = df_caso3.loc[~((df_caso3[condition] == 97) | (df_caso3[condition] == 98) | (df_caso3[condition] == 99))]
    df_caso3.loc[df_caso3[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso3_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','INTUBADO','UCI','BOOL_DEF','TIPO_PACIENTE']
df_caso3 = df_caso3[df_caso3.columns.intersection(final_caso3_columns)]

#print_values(final_caso3_columns, df_caso3)
print_values(['INTUBADO','UCI','TIPO_PACIENTE','BOOL_DEF'], df_caso3)
print(df_caso3.shape)

SEXO 
 0    59.377464
1    40.622536
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 0    86.920953
1    13.079047
Name: INTUBADO, dtype: float64

EDAD 
 60     2.518113
61     2.487010
63     2.480018
65     2.466516
59     2.466034
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.360827
1     0.639173
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.490446
1    32.509554
Name: DIABETES, dtype: float64

EPOC 
 0    96.521561
1     3.478439
Name: EPOC, dtype: float64

ASMA 
 0    97.994238
1     2.005762
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947463
1     2.052537
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.553447
1    38.446553
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841159
1     4.158841
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    78.660173
1    21.339827
Name: OBESIDAD, dt

In [29]:
#CASO 3.1: Mortalidad de los contagiagos DESPUES de INTUBADO,UCI (con filtro)

In [30]:
df_caso3 = df.copy()
df_caso3.drop(df_caso3[(df_caso3['TIPO_PACIENTE'] == 99)].index, inplace = True)
    
conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','INTUBADO','UCI']
for condition in conditions:
    df_caso3 = df_caso3.loc[~((df_caso3[condition] == 97) | (df_caso3[condition] == 98) | (df_caso3[condition] == 99))]
    df_caso3.loc[df_caso3[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no intubado y uci
df_caso3.drop(df_caso3[(df_caso3['INTUBADO'] == 0)].index, inplace = True) 
df_caso3.drop(df_caso3[(df_caso3['UCI'] == 0)].index, inplace = True) 

final_caso3_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','INTUBADO','UCI','BOOL_DEF','TIPO_PACIENTE']
df_caso3 = df_caso3[df_caso3.columns.intersection(final_caso3_columns)]

#print_values(final_caso3_columns, df_caso3)
print_values(['INTUBADO','UCI','TIPO_PACIENTE','BOOL_DEF'], df_caso3)
print(df_caso3.shape)

SEXO 
 0    65.555685
1    34.444315
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 1    100.0
Name: INTUBADO, dtype: float64

EDAD 
 60     2.889330
61     2.760915
67     2.679197
65     2.632501
62     2.632501
         ...   
7      0.023348
101    0.011674
98     0.011674
100    0.005837
116    0.005837
Name: EDAD, Length: 102, dtype: float64

EMBARAZO 
 0    99.340416
1     0.659584
Name: EMBARAZO, dtype: float64

DIABETES 
 0    66.372869
1    33.627131
Name: DIABETES, dtype: float64

EPOC 
 0    96.958907
1     3.041093
Name: EPOC, dtype: float64

ASMA 
 0    98.091291
1     1.908709
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.48424
1     2.51576
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.563157
1    38.436843
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.114406
1     4.885594
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    71.923885
1    28.076115
Name: OBESIDAD, dtype: float64

RENAL_C

In [31]:
#CASO 3.2: Mortalidad de los contagiagos DESPUES de INTUBADO,UCI (filtro solo INTUBADO)

In [32]:
df_caso3 = df.copy()
df_caso3.drop(df_caso3[(df_caso3['TIPO_PACIENTE'] == 99)].index, inplace = True)
    
conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','INTUBADO','UCI']
for condition in conditions:
    df_caso3 = df_caso3.loc[~((df_caso3[condition] == 97) | (df_caso3[condition] == 98) | (df_caso3[condition] == 99))]
    df_caso3.loc[df_caso3[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no intubado
df_caso3.drop(df_caso3[(df_caso3['INTUBADO'] == 0)].index, inplace = True) 

final_caso3_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','INTUBADO','UCI','BOOL_DEF','TIPO_PACIENTE']
df_caso3 = df_caso3[df_caso3.columns.intersection(final_caso3_columns)]

#print_values(final_caso3_columns, df_caso3)
print_values(['INTUBADO','UCI','TIPO_PACIENTE','BOOL_DEF'], df_caso3)
print(df_caso3.shape)

SEXO 
 0    63.81853
1    36.18147
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 1    100.0
Name: INTUBADO, dtype: float64

EDAD 
 63     2.940309
61     2.901596
60     2.859197
65     2.831545
67     2.798363
         ...   
100    0.009217
99     0.007374
101    0.003687
116    0.001843
108    0.001843
Name: EDAD, Length: 104, dtype: float64

EMBARAZO 
 0    99.684769
1     0.315231
Name: EMBARAZO, dtype: float64

DIABETES 
 0    64.699701
1    35.300299
Name: DIABETES, dtype: float64

EPOC 
 0    96.261476
1     3.738524
Name: EPOC, dtype: float64

ASMA 
 0    97.972201
1     2.027799
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.610884
1     2.389116
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    57.772002
1    42.227998
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.295506
1     4.704494
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    73.811894
1    26.188106
Name: OBESIDAD, dtype: float64

RENAL_C

In [33]:
#CASO 3.3: Mortalidad de los contagiagos DESPUES de INTUBADO,UCI (filtro solo UCI)

In [34]:
df_caso3 = df.copy()
df_caso3.drop(df_caso3[(df_caso3['TIPO_PACIENTE'] == 99)].index, inplace = True)
    
conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','INTUBADO','UCI']
for condition in conditions:
    df_caso3 = df_caso3.loc[~((df_caso3[condition] == 97) | (df_caso3[condition] == 98) | (df_caso3[condition] == 99))]
    df_caso3.loc[df_caso3[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no uci
df_caso3.drop(df_caso3[(df_caso3['UCI'] == 0)].index, inplace = True) 

final_caso3_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','INTUBADO','UCI','BOOL_DEF','TIPO_PACIENTE']
df_caso3 = df_caso3[df_caso3.columns.intersection(final_caso3_columns)]

#print_values(final_caso3_columns, df_caso3)
print_values(['INTUBADO','UCI','TIPO_PACIENTE','BOOL_DEF'], df_caso3)
print(df_caso3.shape)

SEXO 
 0    62.920461
1    37.079539
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 1    52.009715
0    47.990285
Name: INTUBADO, dtype: float64

EDAD 
 60     2.859745
61     2.656345
58     2.638130
65     2.495446
55     2.489375
         ...   
98     0.018215
99     0.015179
101    0.015179
103    0.006072
116    0.003036
Name: EDAD, Length: 104, dtype: float64

EMBARAZO 
 0    99.110504
1     0.889496
Name: EMBARAZO, dtype: float64

DIABETES 
 0    66.472374
1    33.527626
Name: DIABETES, dtype: float64

EPOC 
 0    96.760777
1     3.239223
Name: EPOC, dtype: float64

ASMA 
 0    97.996357
1     2.003643
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.641166
1     2.358834
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    62.100789
1    37.899211
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.270188
1     4.729812
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    73.266545
1    26.733455
Name: OBESIDAD, dt

In [35]:
#CASO 4: Necesidad de UCI ANTES de diagnostico de neumonia (sin filtro)

In [36]:
df_caso4 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI']
for condition in conditions:
    df_caso4 = df_caso4.loc[~((df_caso4[condition] == 97) | (df_caso4[condition] == 98) | (df_caso4[condition] == 99))]
    df_caso4.loc[df_caso4[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso4_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI','TIPO_PACIENTE']
df_caso4 = df_caso4[df_caso4.columns.intersection(final_caso4_columns)]

#print_values(final_caso4_columns, df_caso4)
print_values(['TIPO_PACIENTE','UCI'], df_caso4)
print(df_caso4.shape)

SEXO 
 0    59.377464
1    40.622536
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

EDAD 
 60     2.518113
61     2.487010
63     2.480018
65     2.466516
59     2.466034
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.360827
1     0.639173
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.490446
1    32.509554
Name: DIABETES, dtype: float64

EPOC 
 0    96.521561
1     3.478439
Name: EPOC, dtype: float64

ASMA 
 0    97.994238
1     2.005762
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947463
1     2.052537
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.553447
1    38.446553
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841159
1     4.158841
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    78.660173
1    21.339827
Name: OBESIDAD, dtype: float64

RENAL_CRONICA 
 0    94.785114
1     5.214886
Name: RENAL_C

In [37]:
#CASO 5: Necesidad de ICU DESPUES de diagnostico de neumonia (sin filtro)

In [38]:
df_caso5 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI', 'NEUMONIA']
for condition in conditions:
    df_caso5 = df_caso5.loc[~((df_caso5[condition] == 97) | (df_caso5[condition] == 98) | (df_caso5[condition] == 99))]
    df_caso5.loc[df_caso5[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso5_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI', 'NEUMONIA','TIPO_PACIENTE']
df_caso5 = df_caso5[df_caso5.columns.intersection(final_caso5_columns)]

#print_values(final_caso5_columns, df_caso5)
print_values(['TIPO_PACIENTE', 'NEUMONIA','UCI'], df_caso5)
print(df_caso5.shape)

SEXO 
 0    59.377464
1    40.622536
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

NEUMONIA 
 1    65.748936
0    34.251064
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.518113
61     2.487010
63     2.480018
65     2.466516
59     2.466034
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.360827
1     0.639173
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.490446
1    32.509554
Name: DIABETES, dtype: float64

EPOC 
 0    96.521561
1     3.478439
Name: EPOC, dtype: float64

ASMA 
 0    97.994238
1     2.005762
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947463
1     2.052537
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.553447
1    38.446553
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841159
1     4.158841
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    78.660173
1    21.339827
Name: OBESIDAD, dt

In [39]:
#CASO 5.1: Necesidad de ICU DESPUES de diagnostico de neumonia (con filtro)

In [40]:
df_caso5 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI', 'NEUMONIA']
for condition in conditions:
    df_caso5 = df_caso5.loc[~((df_caso5[condition] == 97) | (df_caso5[condition] == 98) | (df_caso5[condition] == 99))]
    df_caso5.loc[df_caso5[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no neumonia
df_caso5.drop(df_caso5[(df_caso5['NEUMONIA'] == 0)].index, inplace = True) 

final_caso5_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI', 'NEUMONIA','TIPO_PACIENTE']
df_caso5 = df_caso5[df_caso5.columns.intersection(final_caso5_columns)]

#print_values(final_caso5_columns, df_caso5)
print_values(['TIPO_PACIENTE', 'NEUMONIA','UCI'], df_caso5)
print(df_caso5.shape)

SEXO 
 0    60.451343
1    39.548657
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

NEUMONIA 
 1    100.0
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.625625
63     2.599955
65     2.581987
61     2.569152
59     2.540182
         ...   
117    0.000367
116    0.000367
110    0.000367
108    0.000367
118    0.000367
Name: EDAD, Length: 110, dtype: float64

EMBARAZO 
 0    99.661529
1     0.338471
Name: EMBARAZO, dtype: float64

DIABETES 
 0    66.237986
1    33.762014
Name: DIABETES, dtype: float64

EPOC 
 0    96.445139
1     3.554861
Name: EPOC, dtype: float64

ASMA 
 0    98.098622
1     1.901378
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.976509
1     2.023491
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    60.509283
1    39.490717
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.788733
1     4.211267
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    77.417427
1    22.582573
Name: OBESIDAD, dtype: float64

RENAL

In [41]:
#CASO 6: necesidad de ventilador ANTES de DIAGNOSTICO de neumonia e ICU (sin filtro)

In [42]:
df_caso6 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','INTUBADO']
for condition in conditions:
    df_caso6 = df_caso6.loc[~((df_caso6[condition] == 97) | (df_caso6[condition] == 98) | (df_caso6[condition] == 99))]
    df_caso6.loc[df_caso6[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso6_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','INTUBADO','TIPO_PACIENTE']
df_caso6 = df_caso6[df_caso6.columns.intersection(final_caso6_columns)]

#print_values(final_caso6_columns, df_caso6)
print_values(['TIPO_PACIENTE','INTUBADO'], df_caso6)
print(df_caso6.shape)

SEXO 
 0    59.377524
1    40.622476
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 0    86.919277
1    13.080723
Name: INTUBADO, dtype: float64

EDAD 
 60     2.518065
61     2.486962
63     2.479970
65     2.466469
59     2.465987
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.36084
1     0.63916
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.49035
1    32.50965
Name: DIABETES, dtype: float64

EPOC 
 0    96.521628
1     3.478372
Name: EPOC, dtype: float64

ASMA 
 0    97.994276
1     2.005724
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947503
1     2.052497
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.552983
1    38.447017
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841239
1     4.158761
Name: CARDIOVASCULAR, dtype: float64

OBESIDAD 
 0    78.660343
1    21.339657
Name: OBESIDAD, dtype:

In [43]:
#CASO 7: necesidad de ventilador DESPUES de DIAGNOSTICO de neumonia e ICU (sin filtro)

In [44]:
df_caso7 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI','NEUMONIA','INTUBADO']
for condition in conditions:
    df_caso7 = df_caso7.loc[~((df_caso7[condition] == 97) | (df_caso7[condition] == 98) | (df_caso7[condition] == 99))]
    df_caso7.loc[df_caso7[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

final_caso7_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI','NEUMONIA','INTUBADO','TIPO_PACIENTE']
df_caso7 = df_caso7[df_caso7.columns.intersection(final_caso7_columns)]

#print_values(final_caso7_columns, df_caso7)
print_values(['TIPO_PACIENTE','UCI','NEUMONIA','INTUBADO'], df_caso7)
print(df_caso7.shape)

SEXO 
 0    59.377464
1    40.622536
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 0    86.920953
1    13.079047
Name: INTUBADO, dtype: float64

NEUMONIA 
 1    65.748936
0    34.251064
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.518113
61     2.487010
63     2.480018
65     2.466516
59     2.466034
         ...   
115    0.000241
110    0.000241
109    0.000241
106    0.000241
118    0.000241
Name: EDAD, Length: 114, dtype: float64

EMBARAZO 
 0    99.360827
1     0.639173
Name: EMBARAZO, dtype: float64

DIABETES 
 0    67.490446
1    32.509554
Name: DIABETES, dtype: float64

EPOC 
 0    96.521561
1     3.478439
Name: EPOC, dtype: float64

ASMA 
 0    97.994238
1     2.005762
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.947463
1     2.052537
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.553447
1    38.446553
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.841159
1     4.158841
Name: CARDIOVASCULAR, dt

In [45]:
#CASO 7.1: necesidad de ventilador DESPUES de DIAGNOSTICO de neumonia e ICU (con filtro)

In [46]:
df_caso7 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI','NEUMONIA','INTUBADO']
for condition in conditions:
    df_caso7 = df_caso7.loc[~((df_caso7[condition] == 97) | (df_caso7[condition] == 98) | (df_caso7[condition] == 99))]
    df_caso7.loc[df_caso7[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no neumonia e ICU
df_caso7.drop(df_caso7[(df_caso7['NEUMONIA'] == 0)].index, inplace = True) 
df_caso7.drop(df_caso7[(df_caso7['UCI'] == 0)].index, inplace = True) 

final_caso7_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI','NEUMONIA','INTUBADO','TIPO_PACIENTE']
df_caso7 = df_caso7[df_caso7.columns.intersection(final_caso7_columns)]

#print_values(final_caso7_columns, df_caso7)
print_values(['TIPO_PACIENTE','UCI','NEUMONIA','INTUBADO'], df_caso7)
print(df_caso7.shape)

SEXO 
 0    63.825818
1    36.174182
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 1    56.686732
0    43.313268
Name: INTUBADO, dtype: float64

NEUMONIA 
 1    100.0
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.935896
61     2.685958
58     2.650755
55     2.573309
62     2.562749
         ...   
5      0.014081
7      0.014081
99     0.010561
103    0.003520
116    0.003520
Name: EDAD, Length: 104, dtype: float64

EMBARAZO 
 0    99.348752
1     0.651248
Name: EMBARAZO, dtype: float64

DIABETES 
 0    65.596508
1    34.403492
Name: DIABETES, dtype: float64

EPOC 
 0    96.634632
1     3.365368
Name: EPOC, dtype: float64

ASMA 
 0    98.018094
1     1.981906
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.644947
1     2.355053
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    61.270109
1    38.729891
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.237089
1     4.762911
Name: CARDIOVASCULAR, dtype: float64

OBESI

In [47]:
#CASO 7.2: necesidad de ventilador DESPUES de DIAGNOSTICO de neumonia e ICU (solo filtro UCI)

In [48]:
df_caso7 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI','NEUMONIA','INTUBADO']
for condition in conditions:
    df_caso7 = df_caso7.loc[~((df_caso7[condition] == 97) | (df_caso7[condition] == 98) | (df_caso7[condition] == 99))]
    df_caso7.loc[df_caso7[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no ICU
df_caso7.drop(df_caso7[(df_caso7['UCI'] == 0)].index, inplace = True) 

final_caso7_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI','NEUMONIA','INTUBADO','TIPO_PACIENTE']
df_caso7 = df_caso7[df_caso7.columns.intersection(final_caso7_columns)]

#print_values(final_caso7_columns, df_caso7)
print_values(['TIPO_PACIENTE','UCI','NEUMONIA','INTUBADO'], df_caso7)
print(df_caso7.shape)

SEXO 
 0    62.920461
1    37.079539
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 1    52.009715
0    47.990285
Name: INTUBADO, dtype: float64

NEUMONIA 
 1    86.238616
0    13.761384
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.859745
61     2.656345
58     2.638130
65     2.495446
55     2.489375
         ...   
98     0.018215
99     0.015179
101    0.015179
103    0.006072
116    0.003036
Name: EDAD, Length: 104, dtype: float64

EMBARAZO 
 0    99.110504
1     0.889496
Name: EMBARAZO, dtype: float64

DIABETES 
 0    66.472374
1    33.527626
Name: DIABETES, dtype: float64

EPOC 
 0    96.760777
1     3.239223
Name: EPOC, dtype: float64

ASMA 
 0    97.996357
1     2.003643
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.641166
1     2.358834
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    62.100789
1    37.899211
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.270188
1     4.729812
Name: CARDIOVASCULAR, dt

In [49]:
#CASO 7.3: necesidad de ventilador DESPUES de DIAGNOSTICO de neumonia e ICU (solo filtro neumonia)

In [50]:
df_caso7 = df.copy()

conditions = ['EMBARAZO','RENAL_CRONICA', 'DIABETES', 'INMUSUPR', 'EPOC', 'OBESIDAD', 'HIPERTENSION', 'TABAQUISMO', 'CARDIOVASCULAR', 'ASMA','UCI','NEUMONIA','INTUBADO']
for condition in conditions:
    df_caso7 = df_caso7.loc[~((df_caso7[condition] == 97) | (df_caso7[condition] == 98) | (df_caso7[condition] == 99))]
    df_caso7.loc[df_caso7[condition] == 2, [condition]] = 0 #0 es NO, 1 es SI

#elimina no neumonia
df_caso7.drop(df_caso7[(df_caso7['NEUMONIA'] == 0)].index, inplace = True) 

final_caso7_columns = ['EDAD','EMBARAZO','RENAL_CRONICA','DIABETES','INMUSUPR','EPOC','OBESIDAD','HIPERTENSION','TABAQUISMO','CARDIOVASCULAR','ASMA','SEXO','UCI','NEUMONIA','INTUBADO','TIPO_PACIENTE']
df_caso7 = df_caso7[df_caso7.columns.intersection(final_caso7_columns)]

#print_values(final_caso7_columns, df_caso7)
print_values(['TIPO_PACIENTE','UCI','NEUMONIA','INTUBADO'], df_caso7)
print(df_caso7.shape)

SEXO 
 0    60.451343
1    39.548657
Name: SEXO, dtype: float64

TIPO_PACIENTE 
 1    100.0
Name: TIPO_PACIENTE, dtype: float64

INTUBADO 
 0    83.220571
1    16.779429
Name: INTUBADO, dtype: float64

NEUMONIA 
 1    100.0
Name: NEUMONIA, dtype: float64

EDAD 
 60     2.625625
63     2.599955
65     2.581987
61     2.569152
59     2.540182
         ...   
117    0.000367
116    0.000367
110    0.000367
108    0.000367
118    0.000367
Name: EDAD, Length: 110, dtype: float64

EMBARAZO 
 0    99.661529
1     0.338471
Name: EMBARAZO, dtype: float64

DIABETES 
 0    66.237986
1    33.762014
Name: DIABETES, dtype: float64

EPOC 
 0    96.445139
1     3.554861
Name: EPOC, dtype: float64

ASMA 
 0    98.098622
1     1.901378
Name: ASMA, dtype: float64

INMUSUPR 
 0    97.976509
1     2.023491
Name: INMUSUPR, dtype: float64

HIPERTENSION 
 0    60.509283
1    39.490717
Name: HIPERTENSION, dtype: float64

CARDIOVASCULAR 
 0    95.788733
1     4.211267
Name: CARDIOVASCULAR, dtype: float64

OBESI