# Covid data analysis 
Source: https://www.saopaulo.sp.gov.br/planosp/simi/dados-abertos/

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import os
from joblib import dump, load
import numpy as np

In [None]:
source = pd.read_csv('./data.csv', sep = ';')
columns = ['Genero', 'Diagnostico Covid19', 'Idade', 'Obito']
df = source[columns]

In [None]:
display(source)

In [None]:
display(df)

In [None]:
df = df.dropna(how="any", axis=0)

In [None]:
masc_total = df[(df['Genero'] == 'MASCULINO') & (df['Diagnostico Covid19'] == 'CONFIRMADO')].shape[0]
fem_total = df[(df['Genero'] == 'FEMININO') & (df['Diagnostico Covid19'] == 'CONFIRMADO')].shape[0]
ni = df[(df['Genero'] != 'FEMININO') & (df['Genero'] != 'MASCULINO') & (df['Diagnostico Covid19'] == 'CONFIRMADO')].shape[0]
total = df[df['Diagnostico Covid19'] == 'CONFIRMADO'].shape[0]
masc = masc_total/total
fem = fem_total/total
ni_ = ni/total

In [None]:
font_title = {'family': 'serif',
        'color':  'darkred',
        'weight': 'normal',
        'size': 12,
        }

font_label = {'family': 'serif',
        'color':  'black',
        'weight': 'normal',
        'size': 12,
        }

In [None]:
fig, ax = plt.subplots()
plt.style.use('ggplot')
ax.pie([masc_total, fem_total, ni], labels = ['Male', 'Female', 'Undefined'], autopct = '%1.5f%%', normalize = True, textprops = font_label)
plt.title('Contamination by gender', loc = 'left', fontsize = 18, fontdict = font_title)
plt.show()

# Mortality

In [None]:
df.Idade = df.Idade.astype(int)

In [None]:
y = []

for i in range(0, 140, 10):
    count_obitos = 0
    for j in range(i, i+10):
        count_obitos += df[(df.Idade == j) & (df.Obito == 1)].shape[0]
    mortalidade = count_obitos/df.shape[0]
    y.append(mortalidade * 100)

In [None]:
fig, ax = plt.subplots()

plt.style.use('ggplot')
ax.barh(range(0, 140, 10), y, height = 8, align = 'edge')
ax.set_ylabel('Age')
ax.set_xlabel('Mortality Rate (%)')
plt.title('Mortality Rate by ages', loc = 'left', fontsize = 18)

plt.show()

# ML Model to predict death chance

In [None]:
data = source
data = data.drop('Outros Fatores De Risco', axis = 1)
data = data.drop('Data Inicio Sintomas', axis = 1)
data = data.drop('Municipio', axis = 1)

In [None]:
data['Asma'] = data['Asma'].map({'SIM':1 ,'NÃO':0})
data['Diabetes'] = data['Diabetes'].map({'SIM':1 ,'NÃO':0})
data['Cardiopatia'] = data['Cardiopatia'].map({'SIM':1 ,'NÃO':0})
data['Doenca Hematologica'] = data['Doenca Hematologica'].map({'SIM':1 ,'NÃO':0})
data['Doenca Hepatica'] = data['Doenca Hepatica'].map({'SIM':1 ,'NÃO':0})
data['Doenca Neurologica'] = data['Doenca Neurologica'].map({'SIM':1 ,'NÃO':0})
data['Doenca Renal'] = data['Doenca Renal'].map({'SIM':1 ,'NÃO':0})
data['Obesidade'] = data['Obesidade'].map({'SIM':1 ,'NÃO':0})
data['Imunodepressao'] = data['Imunodepressao'].map({'SIM':1 ,'NÃO':0})
data['Pneumopatia'] = data['Pneumopatia'].map({'SIM':1 ,'NÃO':0})
data['Puérpera'] = data['Puérpera'].map({'SIM':1 ,'NÃO':0})
data['Síndrome De Down'] = data['Síndrome De Down'].map({'SIM':1 ,'NÃO':0})
data['Diagnostico Covid19'] = data['Diagnostico Covid19'].map({'CONFIRMADO':1})
data['Genero'] = data['Genero'].map({'MASCULINO':1 , 'FEMININO':0})

In [None]:
data = data.dropna(how="any", axis=0)

In [None]:
data.columns[data.isna().any()]

In [None]:
target = data['Obito']
data = data.drop('Obito', axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.3)

In [None]:
from sklearn.impute import SimpleImputer
fill_missing_mean = SimpleImputer(strategy = 'mean', missing_values = np.nan)

X_train = fill_missing_mean.fit_transform(X_train)
X_test = fill_missing_mean.fit_transform(X_test)

In [None]:
from sklearn.naive_bayes import ComplementNB
cnb = ComplementNB()

In [None]:
cnb.fit(X_train,y_train)

In [None]:
pred = cnb.predict(X_test)

In [None]:
print("Accuracy:", metrics.accuracy_score(y_test, pred))
print("Recall:", metrics.precision_score(y_test, pred))
print("Precision:", metrics.recall_score(y_test, pred))

In [None]:
import pickle

#
# Create your model here (same as above)
#

# Save to file in the current working directory
pkl_filename = "pickle_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(cnb, file)

# Load from file
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)