#Análise de Pacientes com Coronavírus 🦠

### Etapa 01: Importação do Conjunto de Dados

In [18]:
# Importação de bibliotecas
import pandas as pd
import numpy as np

In [19]:
# Importando o conjunto de dados dos pacientes
url = '/content/covid_data.csv'
dados = pd.read_csv(url)

In [20]:
# Verificando se a leitura foi feita com sucesso
dados.head()

Unnamed: 0,USMER,MEDICAL_UNIT,SEX,PATIENT_TYPE,DATE_DIED,INTUBED,PNEUMONIA,AGE,PREGNANT,DIABETES,...,ASTHMA,INMSUPR,HIPERTENSION,OTHER_DISEASE,CARDIOVASCULAR,OBESITY,RENAL_CHRONIC,TOBACCO,CLASIFFICATION_FINAL,ICU
0,2,1,1,1,03/05/2020,97,1,65,2,2,...,2,2,1,2,2,2,2,2,3,97
1,2,1,2,1,03/06/2020,97,1,72,97,2,...,2,2,1,2,2,1,1,2,5,97
2,2,1,2,2,09/06/2020,1,2,55,97,1,...,2,2,2,2,2,2,2,2,3,2
3,2,1,1,1,12/06/2020,97,2,53,2,2,...,2,2,2,2,2,2,2,2,7,97
4,2,1,2,1,21/06/2020,97,2,68,97,1,...,2,2,1,2,2,2,2,2,3,97


###Etapa 02: Exploração do Conjunto de Dados

In [21]:
# Obtendo as informações gerais do dataframe
dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 21 columns):
 #   Column                Non-Null Count    Dtype 
---  ------                --------------    ----- 
 0   USMER                 1048575 non-null  int64 
 1   MEDICAL_UNIT          1048575 non-null  int64 
 2   SEX                   1048575 non-null  int64 
 3   PATIENT_TYPE          1048575 non-null  int64 
 4   DATE_DIED             1048575 non-null  object
 5   INTUBED               1048575 non-null  int64 
 6   PNEUMONIA             1048575 non-null  int64 
 7   AGE                   1048575 non-null  int64 
 8   PREGNANT              1048575 non-null  int64 
 9   DIABETES              1048575 non-null  int64 
 10  COPD                  1048575 non-null  int64 
 11  ASTHMA                1048575 non-null  int64 
 12  INMSUPR               1048575 non-null  int64 
 13  HIPERTENSION          1048575 non-null  int64 
 14  OTHER_DISEASE         1048575 non-null  int64 
 15

In [22]:
# Descobrindo a quantidade de valores nulos (NaN)
dados.isnull().sum()

Unnamed: 0,0
USMER,0
MEDICAL_UNIT,0
SEX,0
PATIENT_TYPE,0
DATE_DIED,0
INTUBED,0
PNEUMONIA,0
AGE,0
PREGNANT,0
DIABETES,0


In [23]:
# Verificando os valores da única coluna do tipo 'object'
dados['DATE_DIED'].unique()

array(['03/05/2020', '03/06/2020', '09/06/2020', '12/06/2020',
       '21/06/2020', '9999-99-99', '26/02/2020', '05/04/2020',
       '08/05/2020', '20/05/2020', '17/07/2020', '13/01/2020',
       '22/01/2020', '29/01/2020', '13/02/2020', '18/02/2020',
       '19/02/2020', '20/02/2020', '24/02/2020', '04/03/2020',
       '07/03/2020', '12/03/2020', '14/03/2020', '18/03/2020',
       '27/03/2020', '28/03/2020', '29/03/2020', '02/04/2020',
       '06/04/2020', '07/04/2020', '08/04/2020', '09/04/2020',
       '10/04/2020', '11/04/2020', '12/04/2020', '13/04/2020',
       '14/04/2020', '15/04/2020', '16/04/2020', '17/04/2020',
       '18/04/2020', '20/04/2020', '21/04/2020', '22/04/2020',
       '23/04/2020', '24/04/2020', '25/04/2020', '26/04/2020',
       '27/04/2020', '28/04/2020', '29/04/2020', '30/04/2020',
       '01/05/2020', '02/05/2020', '04/05/2020', '05/05/2020',
       '06/05/2020', '07/05/2020', '09/05/2020', '10/05/2020',
       '11/05/2020', '12/05/2020', '13/05/2020', '14/05

###Etapa 03: Manipulação do Conjunto de Dados

In [24]:
# Substituindo os valores de 97 e 99 por valores nulos (NaN)
valores_trocar = [97, 99]
dados.replace(valores_trocar, np.nan, inplace=True)

In [25]:
# Verificando a alteração
dados['INTUBED']

Unnamed: 0,INTUBED
0,
1,
2,1.0
3,
4,
...,...
1048570,
1048571,2.0
1048572,
1048573,


In [26]:
# Removendo os registros com valores nulos (NaN)
dados.dropna(inplace=True)

In [27]:
# Obtendo os valores da coluna de gênero
dados['SEX'].unique()

array([1])

In [28]:
# Trocando os valores da coluna de gênero por 'object' (string)
dados['SEX'] = dados['SEX'].apply(lambda x: 'Female' if x == 1 else 'Man')
dados.head()

Unnamed: 0,USMER,MEDICAL_UNIT,SEX,PATIENT_TYPE,DATE_DIED,INTUBED,PNEUMONIA,AGE,PREGNANT,DIABETES,...,ASTHMA,INMSUPR,HIPERTENSION,OTHER_DISEASE,CARDIOVASCULAR,OBESITY,RENAL_CHRONIC,TOBACCO,CLASIFFICATION_FINAL,ICU
5,2,1,Female,2,9999-99-99,2.0,1.0,40.0,2.0,2,...,2,2,2,2,2,2,2,2,3,2.0
8,2,1,Female,2,9999-99-99,2.0,2.0,37.0,2.0,1,...,2,2,1,2,2,1,2,2,3,2.0
9,2,1,Female,2,9999-99-99,2.0,2.0,25.0,2.0,2,...,2,2,2,2,2,2,2,2,3,2.0
16,2,1,Female,2,9999-99-99,2.0,1.0,80.0,2.0,2,...,2,2,1,2,2,2,2,2,3,1.0
56,2,1,Female,2,9999-99-99,1.0,1.0,58.0,2.0,2,...,2,2,1,2,1,1,2,2,7,1.0


In [32]:
# Removendo a coluna de estado do paciente
dados.drop(columns=['PATIENT_TYPE'], inplace=True)
dados.head()
# Motivo: a coluna apenas possui o valor numérico de 2

Unnamed: 0,USMER,MEDICAL_UNIT,SEX,DATE_DIED,INTUBED,PNEUMONIA,AGE,PREGNANT,DIABETES,COPD,ASTHMA,INMSUPR,HIPERTENSION,OTHER_DISEASE,CARDIOVASCULAR,OBESITY,RENAL_CHRONIC,TOBACCO,CLASIFFICATION_FINAL,ICU
5,2,1,Female,9999-99-99,2.0,1.0,40.0,2.0,2,2,2,2,2,2,2,2,2,2,3,2.0
8,2,1,Female,9999-99-99,2.0,2.0,37.0,2.0,1,2,2,2,1,2,2,1,2,2,3,2.0
9,2,1,Female,9999-99-99,2.0,2.0,25.0,2.0,2,2,2,2,2,2,2,2,2,2,3,2.0
16,2,1,Female,9999-99-99,2.0,1.0,80.0,2.0,2,2,2,2,1,2,2,2,2,2,3,1.0
56,2,1,Female,9999-99-99,1.0,1.0,58.0,2.0,2,2,2,2,1,2,1,1,2,2,7,1.0
