In [1]:
import argparse
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import warnings
warnings.filterwarnings('ignore')

In [2]:
"""Loading open-access database COVID-19 Mexico""" 
raw_data = 'C:/Users/Salvador/Modelo_COVID19/Origen_IIMAS_SISVER_UNAM_COVID19/SISVER_Curación_IIMAS_2021_2-3/EPI_2021_2-3-2021.csv'
data = pd.read_csv(raw_data)
print ("Total records", data.shape[0])

Total records 5490290


In [3]:
!cd

C:\Users\Salvador\Modelo_COVID19\Libretas manuscrito\BCM Infectius diseases


In [4]:
"""Functions def"""
def read_data(raw_clinical_note):
    data = pd.read_csv(raw_clinical_note, header=0,na_filter=True)
    return data

def delete_columns(data,columns): 
    df = data.drop(columns,axis=1)
    return df

def estimation_of_elapsed_days(data):
    data['FECINISI']= pd.to_datetime(data['FECINISI'])
    data['FECINGRE']= pd.to_datetime(data['FECINGRE'])
    data['dias_trans']= data['FECINGRE'] - data['FECINISI']
    data['dias_trans'] = data['dias_trans'] / np.timedelta64(1, 'D')
    return data

def encoding(data):  
    data['SEXO'] = np.where(data['SEXO'] == 1, '1', '0').astype(int) 
    
    data['DIABETES'] = np.where(data['DIABETES'] == 1, '1', '0').astype(int)  
    data['EPOC'] = np.where(data['EPOC'] == 1, '1', '0').astype(int)  
    data['ASMA'] = np.where(data['ASMA'] == 1, '1', '0').astype(int)  
    data['INMUSUPR'] = np.where(data['INMUSUPR'] == 1, '1', '0').astype(int)  
    data['HIPERTEN'] = np.where(data['HIPERTEN'] == 1, '1', '0').astype(int)  
    data['VIH-SIDA'] = np.where(data['VIH-SIDA'] == 1, '1', '0').astype(int)  
    data['ENFCARDI'] = np.where(data['ENFCARDI'] == 1, '1', '0').astype(int)  
    data['OBESIDAD'] = np.where(data['OBESIDAD'] == 1, '1', '0').astype(int)  
    data['INSRENCR'] = np.where(data['INSRENCR'] == 1, '1', '0').astype(int)  
    data['TABAQUIS'] = np.where(data['TABAQUIS'] == 1, '1', '0').astype(int)
    

    data['FIEBRE'] = np.where(data['FIEBRE'] == 1, '1', '0').astype(int)  
    data['TOS'] = np.where(data['TOS'] == 1, '1', '0').astype(int)  
    data['ODINOGIA'] = np.where(data['ODINOGIA'] == 1, '1', '0').astype(int)  
    data['DISNEA'] = np.where(data['DISNEA'] == 1, '1', '0').astype(int)  
    data['IRRITABI'] = np.where(data['IRRITABI'] == 1, '1', '0').astype(int)  
    data['DIARREA'] = np.where(data['DIARREA'] == 1, '1', '0').astype(int)  
    data['DOTORACI'] = np.where(data['DOTORACI'] == 1, '1', '0').astype(int)
    data['CALOFRIOS'] = np.where(data['CALOFRIOS'] == 1, '1', '0').astype(int)    
    data['CEFALEA'] = np.where(data['CEFALEA'] == 1, '1', '0').astype(int)  
    data['MIALGIAS'] = np.where(data['MIALGIAS'] == 1, '1', '0').astype(int)
    data['ARTRAL'] = np.where(data['ARTRAL'] == 1, '1', '0').astype(int)
    data['ATAEDOGE'] = np.where(data['ATAEDOGE'] == 1, '1', '0').astype(int)  
    data['RINORREA'] = np.where(data['RINORREA'] == 1, '1', '0').astype(int)
    data['POLIPNEA'] = np.where(data['POLIPNEA'] == 1, '1', '0').astype(int)
    data['VOMITO'] = np.where(data['VOMITO'] == 1, '1', '0').astype(int)  
    data['DOLABDO'] = np.where(data['DOLABDO'] == 1, '1', '0').astype(int)
    data['CONJUN'] = np.where(data['CONJUN'] == 1, '1', '0').astype(int)
    data['CIANOSIS'] = np.where(data['CIANOSIS'] == 1, '1', '0').astype(int)  
    data['INISUBIS'] = np.where(data['INISUBIS'] == 1, '1', '0').astype(int)
    data['ANOSMIA'] = np.where(data['ANOSMIA'] == 1, '1', '0').astype(int)
    data['DISGEUSIA'] = np.where(data['DISGEUSIA'] == 1, '1', '0').astype(int)  
    
    data['UCI'] = np.where(data['UCI'] == 1, '1', '0').astype(int)  
    data['INTUBADO'] = np.where(data['INTUBADO'] == 1, '1', '0').astype(int)  
    data['ESTAEMBA'] = np.where(data['ESTAEMBA'] == 2, '1', '0').astype(int)  
    data['ANTIPIRETICOS'] = np.where(data['ANTIPIRETICOS'] == 1, '1', '0').astype(int)  
    data['PUERPERIO'] = np.where(data['PUERPERIO'] == 1, '1', '0').astype(int) 
    
    #LabeL
    data['MORTALIDAD'] = np.where(data['EVOLUCI'] == 5, '1', '0').astype(int) 
    return data


# Patient selection

In [5]:
"""Patients reported with COVID-19"""
pacientes_cov = data[(data["RESDEFIN"]==2)]

In [6]:
pacientes_cov.columns

Index(['ORIGEN', 'SECTOR', 'ID_REGISTRO', 'ENTIDAD', 'UNIDAD', 'FECHREG',
       'FOLIO SINAVE', 'SEXO', 'ENTNACI', 'ENTRESI', 'MPIORESI', 'LOCRESI',
       'TIPACIEN', 'EVOLUCI', 'FEGRESO', 'FECDEF', 'DEFPORINF', 'DEFVERIFI',
       'INTUBADO', 'DIGCLINE', 'FECNACI', 'EDAD', 'NACIONA', 'ESTAEMBA',
       'ESINDIGE', 'HABLEIND', 'OCUPACIO', 'SERINGRE', 'FECINGRE', 'FECINISI',
       'DIAGPROB', 'FIEBRE', 'TOS', 'ODINOGIA', 'DISNEA', 'IRRITABI',
       'DIARREA', 'DOTORACI', 'CALOFRIOS', 'CEFALEA', 'MIALGIAS', 'ARTRAL',
       'ATAEDOGE', 'RINORREA', 'POLIPNEA', 'VOMITO', 'DOLABDO', 'CONJUN',
       'CIANOSIS', 'INISUBIS', 'ANOSMIA', 'DISGEUSIA', 'DIABETES', 'EPOC',
       'ASMA', 'INMUSUPR', 'HIPERTEN', 'VIH-SIDA', 'OTRACON', 'ENFCARDI',
       'OBESIDAD', 'INSRENCR', 'TABAQUIS', 'RECTRATA', 'TXCROBIA', 'TXANTIVI',
       'ANTIVIRA', 'FECINITXANTIVI', 'CONOCASO', 'CONTAVES', 'CONCERDO',
       'CONANIMA', 'VACUNADO', 'FECVAEST', 'TOMMUE', 'LABORA', 'RESDEFIN',
       'ESMIGRA', 'PAISNA

In [7]:
print ("Patients COVID-19", pacientes_cov.shape[0])

Patients COVID-19 1578228


In [8]:
"""Patients from Morelos"""
pacientes_cov_morelos = pacientes_cov[(pacientes_cov["ENTIDAD"]==17)]

In [9]:
print ("Patients from Morelos", pacientes_cov_morelos.shape[0])

Patients from Morelos 12103


In [10]:
pacientes_cov_morelos = encoding(pacientes_cov_morelos)

In [11]:
"""Patients without intubation procedure"""
pacientes_triage = pacientes_cov_morelos[(pacientes_cov_morelos["INTUBADO"]==0)]

In [12]:
print ("Patients without intubation procedure", pacientes_triage.shape[0])

Patients without intubation procedure 11703


In [13]:
"""Patients not admission to intensive care unit"""
pacientes_triage_final = pacientes_triage[(pacientes_triage["UCI"]==0)]

In [14]:
print ("Patients not admission to intensive care unit", pacientes_triage_final.shape[0])

Patients not admission to intensive care unit 11564


In [15]:
columns = ['ORIGEN', 'SECTOR', 'ID_REGISTRO', 'ENTIDAD', 'UNIDAD', 'FECHREG',
           'FOLIO SINAVE', 'ENTNACI', 'ENTRESI', 'MPIORESI','ESTAEMBA',
           'TIPACIEN', 'FEGRESO', 'FECDEF', 'DEFPORINF', 'DEFVERIFI', 'DIGCLINE', 'INTUBADO', 
           'FECNACI', 'NACIONA', 'ESINDIGE', 'CONOCASO', 'FECINITXANTIVI', 'UCI', 'EVOLUCI',
           'HABLEIND', 'OCUPACIO', 'SERINGRE', 'CONTAVES', 'VIH-SIDA',
           'DIAGPROB', 'CONANIMA', 'VACUNADO', 'FECVAEST', 'TOMMUE', 'LABORA', 'OTRACON',
           'ESMIGRA', 'PAISNAL', 'PAISORI', 'FINGMEX', 'PAISTRAN1', 'PROTOCOLO', 'CONCERDO',
           'ANTIVIRA', 'ORIGEN_DATOS', 'ASODIC', 'ANTIGENCOVID', 'TIPO_VAC_COV', 'FEC_VAC_COV',
           'LINAJE', 'VIAJE1', 'VIAJE2', 'VIAJE3', 'VIAJE4','VIAJE5','ESMIGRA','PAISNAL', 
           'VACUNA_COV', 'FECMUEANT','LOCRESI','RECTRATA', 'TXCROBIA','PUERPERIO', 
           'TXANTIVI', 'RESDEFIN', 'CLASCOVID19','FECINGRE', 'FECINISI']

In [16]:
"""Estimate in days the time of elapsed from the onset of disease symptoms to the start of medical care"""
pacientes_triage_final = estimation_of_elapsed_days(pacientes_triage_final)

In [17]:
"""Variables"""
pacientes_triage_final = delete_columns(pacientes_triage_final,columns)

In [18]:
"""Study patients"""
data_final_mor = pacientes_triage_final

In [19]:
data_final_mor.columns

Index(['SEXO', 'EDAD', 'FIEBRE', 'TOS', 'ODINOGIA', 'DISNEA', 'IRRITABI',
       'DIARREA', 'DOTORACI', 'CALOFRIOS', 'CEFALEA', 'MIALGIAS', 'ARTRAL',
       'ATAEDOGE', 'RINORREA', 'POLIPNEA', 'VOMITO', 'DOLABDO', 'CONJUN',
       'CIANOSIS', 'INISUBIS', 'ANOSMIA', 'DISGEUSIA', 'DIABETES', 'EPOC',
       'ASMA', 'INMUSUPR', 'HIPERTEN', 'ENFCARDI', 'OBESIDAD', 'INSRENCR',
       'TABAQUIS', 'ANTIPIRETICOS', 'MORTALIDAD', 'dias_trans'],
      dtype='object')

In [20]:
print ("Study patients", data_final_mor.shape[0])

Study patients 11564


In [21]:
data_final_mor = data_final_mor.rename(columns = {'SEXO': 'Sex','EDAD': 'Age', 'FIEBRE': 'Fever', 'TOS': 'Cough', 'ODINOGIA': 'Odynophagia', 'DISNEA': 'Dyspnea', 'IRRITABI': 'Irritability', 'DIARREA': 'Diarrhea', 'DOTORACI': 'Chest pain', 'CALOFRIOS': 'Chills', 'CEFALEA': 'Headache', 'MIALGIAS': 'Myalgia', 'ARTRAL': 'Arthralgia', 'ATAEDOGE': 'General discomfort', 'RINORREA': 'Rhinorrhea', 'POLIPNEA': 'Polypnea', 'VOMITO': 'Vomiting', 'DOLABDO': 'Abdominal pain', 'CONJUN': 'Conjunctivitis', 'CIANOSIS': 'Cyanosis', 'INISUBIS': 'Sudden onset of symptoms', 'ANOSMIA': 'Anosmia', 'DISGEUSIA': 'Dysgeusia', 'DIABETES': 'Diabetes', 'EPOC': 'COPD', 'ASMA': 'Asthma', 'INMUSUPR': 'Immunosuppression', 'HIPERTEN': 'Hypertension', 'ENFCARDI': 'Cardiovascular disease', 'OBESIDAD': 'Obesity', 'INSRENCR': 'CKD', 'TABAQUIS': 'Smoking', 'ANTIPIRETICOS': 'Use of antipyretics','dias_trans': 'Days elapsed', 'MORTALIDAD': 'Mortality' })

In [22]:
"""Study variables"""
data_final_mor.columns

Index(['Sex', 'Age', 'Fever', 'Cough', 'Odynophagia', 'Dyspnea',
       'Irritability', 'Diarrhea', 'Chest pain', 'Chills', 'Headache',
       'Myalgia', 'Arthralgia', 'General discomfort', 'Rhinorrhea', 'Polypnea',
       'Vomiting', 'Abdominal pain', 'Conjunctivitis', 'Cyanosis',
       'Sudden onset of symptoms', 'Anosmia', 'Dysgeusia', 'Diabetes', 'COPD',
       'Asthma', 'Immunosuppression', 'Hypertension', 'Cardiovascular disease',
       'Obesity', 'CKD', 'Smoking', 'Use of antipyretics', 'Mortality',
       'Days elapsed'],
      dtype='object')

In [23]:
df = pd.DataFrame(data_final_mor)

In [24]:
"""Save datafreme"""
df.to_csv('data_final_mor.csv')