In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix, f1_score,classification_report

In [2]:
#Cargar CSV, formatear columna de fechas
pd.set_option('display.max_columns', None)
df=pd.read_csv('covid/covid.csv',encoding='UTF-8',parse_dates=[3,4],  dayfirst=True)

In [3]:
df.head()

Unnamed: 0,id,sex,patient_type,entry_date,date_symptoms,date_died,intubed,pneumonia,age,pregnancy,diabetes,copd,asthma,inmsupr,hypertension,other_disease,cardiovascular,obesity,renal_chronic,tobacco,contact_other_covid,covid_res,icu
0,16169f,2,1,2020-05-04,2020-05-02,9999-99-99,97,2,27,97,2,2,2,2,2,2,2,2,2,2,2,1,97
1,1009bf,2,1,2020-03-19,2020-03-17,9999-99-99,97,2,24,97,2,2,2,2,2,2,2,2,2,2,99,1,97
2,167386,1,2,2020-04-06,2020-04-01,9999-99-99,2,2,54,2,2,2,2,2,2,2,2,1,2,2,99,1,2
3,0b5948,2,2,2020-04-17,2020-04-10,9999-99-99,2,1,30,97,2,2,2,2,2,2,2,2,2,2,99,1,2
4,0d01b5,1,2,2020-04-13,2020-04-13,22-04-2020,2,2,60,2,1,2,2,2,1,2,1,2,2,2,99,1,2


In [4]:
#Convertir datos
df['days_prior_to_treatment'] = (df['entry_date'] - df['date_symptoms']).dt.days
df['dead']='1'
df.loc[df['date_died']=='9999-99-99','dead']='0'

In [5]:
#Eliminar columnas no utiles
df.drop(['id','patient_type','pregnancy','contact_other_covid','other_disease','entry_date','date_symptoms','date_died'],inplace=True,axis=1)

In [6]:
#Convertir todas las columnas a numericas de 8 bits
df = df.astype('int8')
#Quitar valores fuera de rango
df=df.loc[(df['sex']<=2)&(df['intubed']<=2)&(df['pneumonia']<=2)&(df['diabetes']<=2)&(df['copd']<=2)&(df['asthma']<=2)&(df['inmsupr']<=2)&(df['hypertension']<=2)&(df['cardiovascular']<=2)&(df['obesity']<=2)&(df['renal_chronic']<=2)&(df['tobacco']<=2)&(df['covid_res']<=2)&(df['dead']<=2)&(df['icu']<=2)&(df['days_prior_to_treatment']>=0),['sex','age','inmsupr','pneumonia','diabetes','asthma','copd','hypertension','cardiovascular','renal_chronic','obesity','tobacco','days_prior_to_treatment','covid_res','intubed','icu','dead']]
#Reemplazar valores 2 por 0
df.loc[df['sex']==2,'sex']=0
df.loc[df['inmsupr']==2,'inmsupr']=0
df.loc[df['pneumonia']==2,'pneumonia']=0
df.loc[df['diabetes']==2,'diabetes']=0
df.loc[df['asthma']==2,'asthma']=0
df.loc[df['copd']==2,'copd']=0
df.loc[df['hypertension']==2,'hypertension']=0
df.loc[df['cardiovascular']==2,'cardiovascular']=0
df.loc[df['renal_chronic']==2,'renal_chronic']=0
df.loc[df['obesity']==2,'obesity']=0
df.loc[df['tobacco']==2,'tobacco']=0
df.loc[df['intubed']==2,'intubed']=0
df.loc[df['icu']==2,'icu']=0
df.loc[df['covid_res']==2,'covid_res']=0

In [7]:
#Seleccion de filas con valor covid_res = 1
df=df.loc[df['covid_res']==1,['sex','age','inmsupr','pneumonia','diabetes','asthma','copd','hypertension','cardiovascular','renal_chronic','obesity','tobacco','days_prior_to_treatment','covid_res','intubed','icu','dead']]
#Seleccion de caracteristicas
X = df.drop(['icu','covid_res','dead','intubed'], axis=1)
y = df['icu']

In [8]:
#Escalar
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
#Sobremuestreo
sm = SMOTE(random_state=5)
#particion de datos
X_res, y_res = sm.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.30, random_state=5, stratify=y_res)

In [10]:
#training y prediccion
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
y_pred = knn_model.predict(X_test)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [11]:
#Precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print("Precisión del modelo: {:.2f}%".format(accuracy*100))

Precisión del modelo: 76.60%


In [13]:
print(confusion_matrix(y_test, y_pred))

[[14066  4468]
 [ 4207 14328]]


In [14]:
df.head()

Unnamed: 0,sex,age,inmsupr,pneumonia,diabetes,asthma,copd,hypertension,cardiovascular,renal_chronic,obesity,tobacco,days_prior_to_treatment,covid_res,intubed,icu,dead
2,1,54,0,0,0,0,0,0,0,0,1,0,5,1,0,0,0
3,0,30,0,1,0,0,0,0,0,0,0,0,7,1,0,0,0
4,1,60,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1
5,0,47,0,1,1,0,0,0,0,0,0,0,0,1,0,1,1
6,0,63,0,0,0,0,0,1,0,0,0,0,9,1,0,0,0
