### 1. Import libraries

In [1]:

import pandas as pd
import numpy as np
import tqdm
import os
import seaborn as sns
import matplotlib.pyplot as ply
import cufflinks as cf
import chart_studio.plotly as py
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

%matplotlib inline
init_notebook_mode(connected=True)
cf.go_offline()

### 2. Load dataframes

In [2]:
file_amazon = 'output_01_data_01_AM_mun_especie.csv'
path_input_file_amazon = os.path.join(os.getcwd(), '..', 'sprint_03_data_analysis', 'output', file_amazon)
df_amazon = pd.read_csv(path_input_file_amazon, delimiter=';')
del df_amazon['Unnamed: 0']
df_amazon.head()

Unnamed: 0,CD,Municipio,Falciparum,Mista,Vivax,Malarie,Ovale,Ano,Estado
0,110001,Alta Floresta D'Oeste,15.0,5.0,117.0,0,0,2003,RO
1,110002,Ariquemes,2.329,138.0,4.801,0,0,2003,RO
2,110003,Cabixi,178.0,0.0,54.0,0,0,2003,RO
3,110004,Cacoal,137.0,9.0,279.0,0,0,2003,RO
4,110005,Cerejeiras,57.0,6.0,104.0,0,0,2003,RO


In [3]:
file_malarie_covid_total = 'Covid_malaria_total_casos.csv'
path_input_file_malarie = os.path.join(os.getcwd(), '..', 
                                       'sprint_01_data_collection', 
                                       'data_04', 
                                       file_malarie_covid_total)
df_malarie_covid_total = pd.read_csv(path_input_file_malarie)
df_malarie_covid_total.head()

Unnamed: 0,CD,Municipio.x,Populacao_2020,numeroLeitos,Ano,casos COVID/1000,Casos_COVID_Acumulados,obitos COVID/1000,Obitos_COVID_Acumulados,CFR,CasosAMazonia_Malaria,CasosAmazoniaAcumulados_Malaria,PF,PV,casos_Falciparum,caso_Vivax
0,110001,Alta Floresta D'Oeste,22728,49,2020,566701865540303,1288,703977472720873,16,12422360248447,0,0,0,351988736360436,0,8
1,110002,Ariquemes,109523,256,2020,718570528564776,7870,12234873040366,134,1702668360864,59,521,228262556723245,448307661404454,25,491
2,110003,Cabixi,5188,9,2020,360447185813416,187,115651503469545,6,32085561497326,0,0,0,0,0,0
3,110004,Cacoal,85893,360,2020,470934767676062,4045,66361635988963,57,14091470951792,7,122,23284784557531,138544468117309,2,119
4,110005,Cerejeiras,16204,40,2020,19809923475685,321,431992100715873,7,21806853582555,1,9,0,493705257960997,0,8


In [4]:
file_covid = 'output_02_data_02_covid_cities_confirmed_cases.csv'
path_input_file_covid = os.path.join(os.getcwd(), '..', 
                                     'sprint_03_data_analysis', 'output', 
                                     file_covid)
df_covid = pd.read_csv(path_input_file_covid, delimiter=';')
del df_covid['Unnamed: 0']
df_covid.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-02,18,22945.0,1.0,1,0,0,,,0.0
1,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-03,19,22945.0,1.0,0,0,0,,,0.0
2,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-04,19,22945.0,1.0,0,0,0,,,0.0
3,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-05,19,22945.0,1.0,0,0,0,,,0.0
4,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-06,19,22945.0,1.0,0,0,0,,,0.0


### 3.1 df covid: mean of new daily new cases in Brazil

In [5]:
df_covid['casosNovos'].mean()

7.682014226895754

### 3.2 df covid: mean of new daily new cases in Brazil by state

In [6]:
df_covid[['estado', 'casosNovos']].groupby(['estado']).agg({'casosNovos': ['min', 'mean', 'std', 'max']}).head()

Unnamed: 0_level_0,casosNovos,casosNovos,casosNovos,casosNovos
Unnamed: 0_level_1,min,mean,std,max
estado,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AC,-15,9.520333,27.545478,422
AL,-114,4.811451,25.866409,924
AM,-824,15.559361,82.751944,3632
AP,-103,17.299858,56.790627,2504
BA,-69,6.253002,35.755661,3486


### 3.3 df covid: mean of new cases in Brazil by city

In [7]:
df_covid[['codmun', 'casosNovos']].groupby('codmun').agg({'casosNovos': ['min', 'mean', 'std', 'max']}).head()

Unnamed: 0_level_0,casosNovos,casosNovos,casosNovos,casosNovos
Unnamed: 0_level_1,min,mean,std,max
codmun,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
110001.0,-8,9.423377,10.742486,72
110002.0,-62,47.739558,55.738225,430
110003.0,-5,1.923295,4.814191,45
110004.0,-65,30.860465,40.579049,221
110005.0,-3,5.975275,11.538268,97


### 4.1 df covid: (sum of new cases) / length population, in Brazil

In [8]:
df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().head()

Unnamed: 0,codmun,populacaoTCU2019
0,110001.0,22945.0
385,110002.0,107863.0
792,110003.0,5312.0
1144,110004.0,85359.0
1531,110005.0,16323.0


In [9]:
n_pop_BRA = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates()['populacaoTCU2019'].sum()
n_pop_BRA

210147125.0

In [10]:
df_covid['casosNovos'].sum() / n_pop_BRA

0.07584011915461608

### 4.2 df covid: (sum of new cases) / length population, in Brazil by state

In [11]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(subset=['codmun']).head()

Unnamed: 0,estado,codmun,populacaoTCU2019
0,RO,110001.0,22945.0
385,RO,110002.0,107863.0
792,RO,110003.0,5312.0
1144,RO,110004.0,85359.0
1531,RO,110005.0,16323.0


In [12]:
df_covid_sum_population_by_state = df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(
    subset=['codmun']).groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

Unnamed: 0_level_0,populacaoTCU2019
Unnamed: 0_level_1,sum
estado,Unnamed: 1_level_2
AC,881935.0
AL,3337357.0
AM,4144597.0
AP,845731.0
BA,14873064.0


In [13]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_state.head()

Unnamed: 0_level_0,casosNovos
Unnamed: 0_level_1,sum
estado,Unnamed: 1_level_2
AC,81237
AL,187483
AM,381640
AP,109906
BA,966095


In [65]:
df_covid_rate_infected_by_state = df_covid_sum_newcases_by_state['casosNovos'][['sum']] / \
df_covid_sum_population_by_state['populacaoTCU2019'][['sum']]

df_covid_rate_infected_by_state.head()

Unnamed: 0_level_0,sum
estado,Unnamed: 1_level_1
AC,0.092112
AL,0.056177
AM,0.092081
AP,0.129954
BA,0.064956


### 4.3 df covid: (sum of new cases) / length population, in Brazil by city

In [21]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates().head()

Unnamed: 0,estado,codmun,populacaoTCU2019
0,RO,110001.0,22945.0
385,RO,110002.0,107863.0
792,RO,110003.0,5312.0
1144,RO,110004.0,85359.0
1531,RO,110005.0,16323.0


In [67]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

Unnamed: 0_level_0,populacaoTCU2019
codmun,Unnamed: 1_level_1
110001.0,22945.0
110002.0,107863.0
110003.0,5312.0
110004.0,85359.0
110005.0,16323.0


In [63]:
df_covid_sum_newcases_by_city = df_covid[['codmun', 'casosNovos']].groupby(['codmun']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_city.head()

Unnamed: 0_level_0,casosNovos
Unnamed: 0_level_1,sum
codmun,Unnamed: 1_level_2
110001.0,3628
110002.0,19430
110003.0,677
110004.0,11943
110005.0,2175


In [96]:
df_covid_rate_infected_by_city = pd.DataFrame(df_covid_sum_newcases_by_city['casosNovos']['sum'] / \
df_covid_sum_population_by_city['populacaoTCU2019'], columns=['infected rate'])

df_covid_rate_infected_by_city.head()

Unnamed: 0_level_0,infected rate
codmun,Unnamed: 1_level_1
110001.0,0.158117
110002.0,0.180136
110003.0,0.127447
110004.0,0.139915
110005.0,0.133248
