# Pipeline

## 1. Import libraries

In [1]:
import pandas as pd
import numpy as np
import tqdm
import os
import seaborn as sns
import matplotlib.pyplot as ply
import cufflinks as cf
import chart_studio.plotly as py
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

%matplotlib inline
init_notebook_mode(connected=True)
cf.go_offline()

## 2. Load dataframes

In [2]:
file_amazon = 'output_01_data_01_AM_mun_especie.csv'
path_input_file_amazon = os.path.join(os.getcwd(), '..', 'sprint_03_data_analysis', 'output', file_amazon)
df_amazon = pd.read_csv(path_input_file_amazon, delimiter=';')
del df_amazon['Unnamed: 0']
df_amazon.head()

Unnamed: 0,CD,Municipio,Falciparum,Mista,Vivax,Malarie,Ovale,Ano,Estado
0,110001,Alta Floresta D'Oeste,15.0,5.0,117.0,0,0,2003,RO
1,110002,Ariquemes,2.329,138.0,4.801,0,0,2003,RO
2,110003,Cabixi,178.0,0.0,54.0,0,0,2003,RO
3,110004,Cacoal,137.0,9.0,279.0,0,0,2003,RO
4,110005,Cerejeiras,57.0,6.0,104.0,0,0,2003,RO


In [3]:
file_malarie_covid_total = 'Covid_malaria_total_casos.csv'
path_input_file_malarie = os.path.join(os.getcwd(), '..', 
                                       'sprint_01_data_collection', 
                                       'data_04', 
                                       file_malarie_covid_total)
df_malarie_covid_total = pd.read_csv(path_input_file_malarie)
df_malarie_covid_total.head()

Unnamed: 0.1,Unnamed: 0,CD,Municipio.x,Populacao_2020,numeroLeitos,Ano,casos COVID/1000,Casos_COVID_Acumulados,obitos COVID/1000,Obitos_COVID_Acumulados,CFR,CasosAMazonia_Malaria,CasosAmazoniaAcumulados_Malaria,PF,PV,casos_Falciparum,caso_Vivax
0,0,110001,Alta Floresta D'Oeste,22728,49,2020,56.670187,1288,0.703977,16,0.012422,0,0,0.0,0.351989,0,8
1,1,110002,Ariquemes,109523,256,2020,71.857053,7870,1.223487,134,0.017027,59,521,0.228263,4.483077,25,491
2,2,110003,Cabixi,5188,9,2020,36.044719,187,1.156515,6,0.032086,0,0,0.0,0.0,0,0
3,3,110004,Cacoal,85893,360,2020,47.093477,4045,0.663616,57,0.014091,7,122,0.023285,1.385445,2,119
4,4,110005,Cerejeiras,16204,40,2020,19.809923,321,0.431992,7,0.021807,1,9,0.0,0.493705,0,8


In [2]:
file_covid = 'output_02_data_02_covid_cities_confirmed_cases.csv'
path_input_file_covid = os.path.join(os.getcwd(), '..', 
                                     'sprint_03_data_analysis', 'output', 
                                     file_covid)
df_covid = pd.read_csv(path_input_file_covid, delimiter=';')
del df_covid['Unnamed: 0']
df_covid.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-02,18,22945.0,1.0,1,0,0,,,0.0
1,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-03,19,22945.0,1.0,0,0,0,,,0.0
2,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-04,19,22945.0,1.0,0,0,0,,,0.0
3,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-05,19,22945.0,1.0,0,0,0,,,0.0
4,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-05-06,19,22945.0,1.0,0,0,0,,,0.0


In [6]:
file_covid_deaths = 'output_03_data_02_covid_cities_confirmed_deaths.csv'
path_input_file_covid_deaths = os.path.join(os.getcwd(), '..', 
                                     'sprint_03_data_analysis', 'output', 
                                     file_covid_deaths)
df_covid_deaths = pd.read_csv(path_input_file_covid_deaths, delimiter=';')
del df_covid_deaths['Unnamed: 0']
df_covid_deaths.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-07-13,29,22945.0,156.0,1,1,1,,,0.0
1,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-07-14,29,22945.0,161.0,5,1,0,,,0.0
2,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-07-15,29,22945.0,175.0,14,1,0,,,0.0
3,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-07-16,29,22945.0,181.0,6,1,0,,,0.0
4,Norte,RO,Alta Floresta D'Oeste,11,110001.0,11005.0,ZONA DA MATA,2020-07-17,29,22945.0,183.0,2,1,0,,,0.0


### 3.1 df covid: mean of new daily new cases in Brazil

In [None]:
df_covid['casosNovos'].mean()

### 3.2 df covid: mean of new daily new cases in Brazil by state

In [None]:
df_covid_mean_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['min', 'mean', 'std', 'max']}).reset_index()

df_covid_mean_newcases_by_state.head()

df_covid_mean_newcases_by_state['casosNovos']['mean']
# df_covid_mean_newcases_by_state['estado']
# df_covid_mean_newcases_by_state.index.tolist()

### 3.3 df covid: mean of new cases in Brazil by city

In [None]:
df_covid[['codmun', 'casosNovos']].groupby('codmun').agg({'casosNovos': ['min', 'mean', 'std', 'max']}).head()

### 4.1 df covid: (sum of new cases) / length population, in Brazil

In [None]:
df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
n_pop_BRA = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates()['populacaoTCU2019'].sum()
n_pop_BRA

In [None]:
df_covid['casosNovos'].sum() / n_pop_BRA

### 4.2 df covid: (sum of new cases) / length population, in Brazil by state

In [None]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(subset=['codmun']).head()

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(
    subset=['codmun']).groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

In [None]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_state.head()

In [None]:
df_covid_rate_infected_by_state = df_covid_sum_newcases_by_state['casosNovos'][['sum']] / \
df_covid_sum_population_by_state['populacaoTCU2019'][['sum']]

df_covid_rate_infected_by_state.head()

### 4.3 df covid: (sum of new cases) / length population, in Brazil by city

In [None]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_sum_newcases_by_city = df_covid[['codmun', 'casosNovos']].groupby(['codmun']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_city.head()

In [None]:
df_covid_rate_infected_by_city = pd.DataFrame(df_covid_sum_newcases_by_city['casosNovos']['sum'] / \
df_covid_sum_population_by_city['populacaoTCU2019'], columns=['infected rate'])

df_covid_rate_infected_by_city.head()

## Since the first case ...

### 5.1 df covid: mean of new deaths in Brazil

In [None]:
df_covid['obitosNovos'].mean()

### 5.2 df covid: mean of new deaths in Brazil by state

In [None]:
df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg({'obitosNovos': ['min', 'mean', 'std', 'max']}).head()

### 5.3 df covid: mean of new deaths in Brazil by city

In [None]:
df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg({'obitosNovos': ['min', 'mean', 'std', 'max']}).head()

## Since the first death ...

### 6.1 df covid deaths: mean of new deaths in Brazil

In [None]:
df_covid_deaths['obitosNovos'].mean()

### 6.2 df covid deaths: mean of new deaths in Brazil by state

In [None]:
df_covid_deaths[['estado', 'obitosNovos']].groupby(['estado']).agg({'obitosNovos': ['min', 'mean', 'std', 'max']}).head()

### 6.3 df covid deaths: mean of new deaths in Brazil by city

In [None]:
df_covid_deaths[['codmun', 'obitosNovos']].groupby('codmun').agg({'obitosNovos': ['min', 'mean', 'std', 'max']}).head()

## Since the first case ...

### 7.1 df covid: (sum of new deaths) / length population, in Brazil

In [None]:
df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
n_pop_BRA = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates()['populacaoTCU2019'].sum()
n_pop_BRA

In [None]:
df_covid['obitosNovos'].sum() / n_pop_BRA

### 7.2 df covid: (sum of new deaths) / length population, in Brazil by state

In [None]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(subset=['codmun']).head()

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(
    subset=['codmun']).groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

In [None]:
df_covid_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

df_covid_sum_newdeaths_by_state.head()

In [None]:
df_covid_rate_deaths_by_state = df_covid_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
df_covid_sum_population_by_state['populacaoTCU2019'][['sum']]

df_covid_rate_deaths_by_state.head()

### 7.3 df covid: (sum of new deaths) / length population, in Brazil by city

In [None]:
df_covid[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_sum_newdeaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby(['codmun']).agg(
    {'obitosNovos': ['sum']})

df_covid_sum_newdeaths_by_city.head()

In [None]:
df_covid_rate_death_by_city = pd.DataFrame(df_covid_sum_newdeaths_by_city['obitosNovos']['sum'] / \
df_covid_sum_population_by_city['populacaoTCU2019'], columns=['death rate'])

df_covid_rate_death_by_city.head()

## since the first death ...

### 8.1 df covid deaths: (sum of new deaths) / length population, in Brazil

In [None]:
df_covid_deaths[['codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
n_pop_BRA_covid_deaths = df_covid_deaths[['codmun', 'populacaoTCU2019']].drop_duplicates()['populacaoTCU2019'].sum()
n_pop_BRA_covid_deaths

In [None]:
df_covid_deaths['obitosNovos'].sum() / n_pop_BRA_covid_deaths

### 8.2 df covid deaths: (sum of new deaths) / length population, in Brazil by state

In [None]:
df_covid_deaths[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(subset=['codmun']).head()

In [None]:
df_covid_deaths_sum_population_by_state = df_covid_deaths[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates(
    subset=['codmun']).groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_deaths_sum_population_by_state.head()

In [None]:
df_covid_deaths_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

df_covid_deaths_sum_newdeaths_by_state.head()

In [None]:
df_covid_deaths_rate_deaths_by_state = df_covid_deaths_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
df_covid_deaths_sum_population_by_state['populacaoTCU2019'][['sum']]

df_covid_deaths_rate_deaths_by_state.head()

### 8.3 df covid deaths: (sum of new deaths) / length population, in Brazil by city

In [None]:
df_covid_deaths[['estado', 'codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
df_covid_deaths_sum_population_by_city = df_covid_deaths[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_deaths_sum_population_by_city.head()

In [None]:
df_covid_deaths_sum_newdeaths_by_city = df_covid_deaths[['codmun', 'obitosNovos']].groupby(['codmun']).agg(
    {'obitosNovos': ['sum']})

df_covid_deaths_sum_newdeaths_by_city.head()

In [None]:
df_covid_deaths_rate_death_by_city = pd.DataFrame(df_covid_deaths_sum_newdeaths_by_city['obitosNovos']['sum'] / \
df_covid_deaths_sum_population_by_city['populacaoTCU2019'], columns=['death rate'])

df_covid_deaths_rate_death_by_city.head()

## Lethality

### 9.1 df covid: (sum of new deaths) / (sum of new cases), in Brazil

In [None]:
df_covid['obitosNovos'].sum() / df_covid['casosNovos'].sum()

### 9.2 df covid: (sum of new deaths) / (sum of new cases), in Brazil by state

In [None]:
df_covid_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

df_covid_sum_newdeaths_by_state.head()

In [None]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_state.head()

In [None]:
df_covid_lethality_by_state = df_covid_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
df_covid_sum_newcases_by_state['casosNovos'][['sum']]

df_covid_lethality_by_state.head()

### 9.3 df covid: (sum of new deaths) / (sum of new cases), in Brazil by city

In [None]:
df_covid_sum_newdeaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby(['codmun']).agg(
    {'obitosNovos': ['sum']})

df_covid_sum_newdeaths_by_city.head()

In [None]:
df_covid_sum_newcases_by_city = df_covid[['codmun', 'casosNovos']].groupby(['codmun']).agg(
    {'casosNovos': ['sum']})

df_covid_sum_newcases_by_city.head()

In [None]:
df_covid_lethality_by_city = df_covid_sum_newdeaths_by_city['obitosNovos'][['sum']] / \
df_covid_sum_newcases_by_city['casosNovos'][['sum']]

df_covid_lethality_by_city.head()

## Covid pressure in Brazil in tottaly, by state, and by city

### 10.1 df covid: df_covid['data'].unique(), in Brazil

In [None]:
df_covid['data'].unique().shape

### 10.2 df covid: df_covid['data'].unique(), in Brazil by state

In [None]:
df_covid[['estado', 'codmun', 'data']].groupby(by='estado').head(1)

In [None]:
"""
https://stackoverflow.com/questions/45759966/counting-unique-values-in-a-column-in-pandas-dataframe-like-in-qlik

 __________________________________________________
 nunique | Count distinct values
 ________|_________________________________________
 count   | Count only non-null values
 ________|_________________________________________
 size    | Count total values including null values
 ________|_________________________________________
 
"""

df_covid[['estado', 'data']].groupby(by='estado').agg(
    {'data': ['nunique', 'count', 'size']}).head()

In [None]:
df_covid[['estado', 'data']].groupby(by='estado').agg(
    {'data': ['nunique', 'count', 'size']}).sort_values(
    by=('data', 'count'), ascending=False).head()

In [None]:
"""
Proof of 9
"""
for estate in df_covid[['estado', 'codmun', 'data']]['estado'].unique():
#     print(df_covid[df_covid['estado'] == estate][['estado', 'codmun', 'data']].head())
    df_temp = df_covid[df_covid['estado'] == estate][['data']]
    unique_dates = np.unique(df_temp['data']).shape[0]
    print(estate, unique_dates)
#     break
    del df_temp

print(df_covid[['estado', 'codmun', 'data']][['data']].iloc[0, 0])
print(df_covid[['estado', 'codmun', 'data']][['data']].iloc[1, 0])

### 10.3 df covid: df_covid['data'].unique(), in Brazil by city

In [None]:
df_covid[['estado', 'codmun', 'data']].groupby(by='estado').head(1)

In [None]:
"""
https://stackoverflow.com/questions/45759966/counting-unique-values-in-a-column-in-pandas-dataframe-like-in-qlik

 __________________________________________________
 nunique | Count distinct values
 ________|_________________________________________
 count   | Count only non-null values
 ________|_________________________________________
 size    | Count total values including null values
 ________|_________________________________________
 
"""

df_covid[['codmun', 'data']].groupby(by='codmun').agg(
    {'data': ['nunique', 'count', 'size']}).head()

In [None]:
df_covid[['codmun', 'data']].groupby(by='codmun').agg(
    {'data': ['nunique', 'count', 'size']}).sort_values(
    by=('data', 'count'), ascending=False)

## Mean of deaths by miliion in Brazil in total, by state, and by city

### 11.1 df_covid: (mean of deaths / population) * 10⁹ in Brazil

In [None]:
df_covid['obitosNovos'].mean()

In [None]:
df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().head()

In [None]:
n_pop_BRA = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates()['populacaoTCU2019'].sum()
n_pop_BRA

In [None]:
(df_covid['obitosNovos'].mean() / n_pop_BRA ) * 10**9

### 11.2 df_covid: (mean of deaths / population) * 10⁹ in Brazil, by estate

In [None]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

In [None]:
pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million']).head()

### 11.3 df_covid: (mean of deaths / population) * 10⁹ in Brazil, by city

In [None]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million']).head()

# <center> Relationship between Covid and Malaria, for all years</center>

## <center>Deaths by million, by state</center>

### 12.1 (df covid: deaths by million) vs (df amazon: num cases p. vivax)

In [7]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

Unnamed: 0_level_0,obitosNovos
Unnamed: 0_level_1,mean
estado,Unnamed: 1_level_2
AC,0.191257
AL,0.117307
AM,0.526419
AP,0.260349
BA,0.130032


In [8]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

Unnamed: 0_level_0,populacaoTCU2019
Unnamed: 0_level_1,sum
estado,Unnamed: 1_level_2
AC,881935.0
AL,3337357.0
AM,4144597.0
AP,845731.0
BA,14808908.0


In [9]:
df_covid_by_state_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_state_deaths_by_million.head()

Unnamed: 0_level_0,deaths by million
estado,Unnamed: 1_level_1
AC,216.861187
AL,35.14979
AM,127.013262
AP,307.839539
BA,8.780629


In [None]:
df_amazon_by_state_sum_vivax = df_amazon.groupby(['Estado']).agg(
    {'Vivax': ['sum']}).drop('Total')

df_amazon_by_state_sum_vivax

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_by_state_deaths_by_million.index]
).reshape(df_covid_by_state_deaths_by_million.shape[0], 1)

df_covid_by_state_deaths_by_million[bol_amazon_states]

In [None]:
df_deaths_by_million_vs_sum_vivax_by_state = pd.merge(
    left=df_covid_by_state_deaths_by_million[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_vivax.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_by_state

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_vivax_by_state[['estado', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_state[['estado', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_state[[('Vivax', 'sum')]].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_state[[('Vivax', 'sum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)]

### 12.2 (df covid: deaths by million) vs (df amazon: num cases p. falciparum)

In [None]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

# ok

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

# ok

In [None]:
df_covid_by_state_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_state_deaths_by_million.head()

# ok

In [None]:
df_amazon_by_state_sum_falciparum = df_amazon.groupby(['Estado']).agg(
    {'Falciparum': ['sum']}).drop('Total')

df_amazon_by_state_sum_falciparum

# ok

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_by_state_deaths_by_million.index]
).reshape(df_covid_by_state_deaths_by_million.shape[0], 1)

df_covid_by_state_deaths_by_million[bol_amazon_states]

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state = pd.merge(
    left=df_covid_by_state_deaths_by_million[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_falciparum.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_falciparum_by_state

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[[('Falciparum', 'sum')]].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[[('Falciparum', 'sum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)]

### 12.3 (df covid: deaths by million) vs (df amazon: num cases p. vivax + p. falciparum)

In [None]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

# ok

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

# ok

In [None]:
df_covid_by_state_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_state_deaths_by_million.head()

# ok

In [None]:
df_amazon_by_state_sum_vivax_falciparum = pd.DataFrame(
    df_amazon.groupby(['Estado']).agg(
        {'Vivax': ['sum'], 'Falciparum': ['sum']}).drop('Total').sum(axis=1), 
    columns=['sum_vivax_falciparum'])

df_amazon_by_state_sum_vivax_falciparum

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_by_state_deaths_by_million.index]
).reshape(df_covid_by_state_deaths_by_million.shape[0], 1)

df_covid_by_state_deaths_by_million[bol_amazon_states]

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_state = pd.merge(
    left=df_covid_by_state_deaths_by_million[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_vivax_falciparum.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_falciparum_by_state

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_state[['sum_vivax_falciparum']].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_state[[('sum_vivax_falciparum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)]

### 12.4 (df covid: deaths by million) vs (df amazon: num cases all p. species)

In [None]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

In [None]:
df_covid_by_state_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_state_deaths_by_million.head()

In [None]:
df_amazon_by_state_sum_all_plasmodium_species = pd.DataFrame(
    df_amazon.groupby(['Estado']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum'], 
         'Malarie': ['sum'], 
         'Ovale': ['sum'], 
         'Mista': ['sum']}).drop('Total').sum(axis=1), 
    columns=['sum_all_plasmodium_species'])

df_amazon_by_state_sum_all_plasmodium_species

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_by_state_deaths_by_million.index]
).reshape(df_covid_by_state_deaths_by_million.shape[0], 1)

df_covid_by_state_deaths_by_million[bol_amazon_states]

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state = pd.merge(
    left=df_covid_by_state_deaths_by_million[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_all_plasmodium_species.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_all_plasmodium_species_by_state

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state[['sum_all_plasmodium_species']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state[['sum_all_plasmodium_species']].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)]

## <center>Deaths by million, by city, all years</center>

### 13.1 (df covid: deaths by million) vs (df amazon: num cases p. vivax)

In [17]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

Unnamed: 0_level_0,obitosNovos
Unnamed: 0_level_1,mean
codmun,Unnamed: 1_level_2
110001.0,0.158442
110002.0,1.034398
110003.0,0.051136
110004.0,0.609819
110005.0,0.151099


In [18]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

Unnamed: 0_level_0,populacaoTCU2019
codmun,Unnamed: 1_level_1
110001.0,22945.0
110002.0,107863.0
110003.0,5312.0
110004.0,85359.0
110005.0,16323.0


In [20]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.57448
110004,7144.168997
110005,9256.809477


In [21]:
df_amazon_by_city_sum_vivax = df_amazon.groupby(['CD']).agg(
    {'Vivax': ['sum']}).drop('Total geral')

df_amazon_by_city_sum_vivax.head()

Unnamed: 0_level_0,Vivax
Unnamed: 0_level_1,sum
CD,Unnamed: 1_level_2
110001,1519.0
110002,4256.198
110003,182.0
110004,4121.163
110005,647.0


In [22]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

706


['110001',
 '110002',
 '110003',
 '110004',
 '110005',
 '110006',
 '110007',
 '110008',
 '110009',
 '110010']

In [23]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.574480
110004,7144.168997
110005,9256.809477
...,...
510860,2484.542265
510880,7990.951784
510885,4383.104360
510890,2925.691341


In [24]:
df_deaths_by_million_vs_sum_vivax_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_vivax.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_by_city


merging between different levels can give an unintended result (1 levels on the left,2 on the right)


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



Unnamed: 0,codmun,deaths by million,"(Vivax, sum)"
42,110146,19103.948404,595.000
635,510455,17897.774139,70.000
676,510719,12561.946097,9.000
9,110010,11696.012466,6295.477
376,172065,11291.496865,2.000
...,...,...,...
553,211085,220.156179,16.000
528,210920,198.809924,283.000
538,210975,0.000000,19.000
684,510774,0.000000,3.000


#### <center> Amazon </center>

In [25]:
df_deaths_by_million_vs_sum_vivax_by_city[['codmun', 'deaths by million']].mean()

codmun                       inf
deaths by million    3766.204675
dtype: float64

In [26]:
df_deaths_by_million_vs_sum_vivax_by_city[['codmun', 'deaths by million']].std()

deaths by million    2523.062031
dtype: float64

In [42]:
df_deaths_by_million_vs_sum_vivax_by_city[[('Vivax', 'sum')]].mean()

(Vivax, sum)    1120.861579
dtype: float64

In [43]:
df_deaths_by_million_vs_sum_vivax_by_city[[('Vivax', 'sum')]].std()

(Vivax, sum)    1748.950076
dtype: float64

#### <center>No Amazon</center>

In [28]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

deaths by million    4179.158202
dtype: float64

In [30]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

deaths by million    2416.82618
dtype: float64

In [31]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
150160,1894.162412
170025,10203.873390
170040,1692.459037
170230,2565.927223
170270,2112.458860
...,...
522200,3518.754080
522205,6962.091412
522220,1917.731247
522230,2642.950590


### 13.2 (df covid: deaths by million) vs (df amazon: num cases p. falciparum)

In [32]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

Unnamed: 0_level_0,obitosNovos
Unnamed: 0_level_1,mean
codmun,Unnamed: 1_level_2
110001.0,0.158442
110002.0,1.034398
110003.0,0.051136
110004.0,0.609819
110005.0,0.151099


In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [33]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.57448
110004,7144.168997
110005,9256.809477


In [34]:
df_amazon_by_city_sum_falciparum = df_amazon.groupby(['CD']).agg(
    {'Falciparum': ['sum']}).drop('Total geral')

df_amazon_by_city_sum_falciparum.head()

Unnamed: 0_level_0,Falciparum
Unnamed: 0_level_1,sum
CD,Unnamed: 1_level_2
110001,170.0
110002,2973.982
110003,202.0
110004,1437.0
110005,218.0


In [35]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

706


['110001',
 '110002',
 '110003',
 '110004',
 '110005',
 '110006',
 '110007',
 '110008',
 '110009',
 '110010']

In [36]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.574480
110004,7144.168997
110005,9256.809477
...,...
510860,2484.542265
510880,7990.951784
510885,4383.104360
510890,2925.691341


In [37]:
df_deaths_by_million_vs_sum_falciparum_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_falciparum.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_falciparum_by_city


merging between different levels can give an unintended result (1 levels on the left,2 on the right)


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



Unnamed: 0,codmun,deaths by million,"(Falciparum, sum)"
42,110146,19103.948404,186.0
635,510455,17897.774139,3.0
676,510719,12561.946097,0.0
9,110010,11696.012466,2848.0
376,172065,11291.496865,0.0
...,...,...,...
553,211085,220.156179,4.0
528,210920,198.809924,0.0
538,210975,0.000000,6.0
684,510774,0.000000,1.0


#### <center> Amazon </center>

In [44]:
df_deaths_by_million_vs_sum_falciparum_by_city[['codmun', 'deaths by million']].mean()

codmun                       inf
deaths by million    3766.204675
dtype: float64

In [39]:
df_deaths_by_million_vs_sum_falciparum_by_city[['codmun', 'deaths by million']].std()

deaths by million    2523.062031
dtype: float64

In [40]:
df_deaths_by_million_vs_sum_falciparum_by_city[[('Falciparum', 'sum')]].mean()

(Falciparum, sum)    568.159693
dtype: float64

In [41]:
df_deaths_by_million_vs_sum_falciparum_by_city[[('Falciparum', 'sum')]].std()

(Falciparum, sum)    1215.38904
dtype: float64

#### <center>No Amazon</center>

In [45]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

deaths by million    4179.158202
dtype: float64

In [46]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

deaths by million    2416.82618
dtype: float64

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

### 13.3 (df covid: deaths by million) vs (df amazon: num cases p. vivax + p. falciparum)

In [47]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

Unnamed: 0_level_0,obitosNovos
Unnamed: 0_level_1,mean
codmun,Unnamed: 1_level_2
110001.0,0.158442
110002.0,1.034398
110003.0,0.051136
110004.0,0.609819
110005.0,0.151099


In [48]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

Unnamed: 0_level_0,populacaoTCU2019
codmun,Unnamed: 1_level_1
110001.0,22945.0
110002.0,107863.0
110003.0,5312.0
110004.0,85359.0
110005.0,16323.0


In [49]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.57448
110004,7144.168997
110005,9256.809477


In [50]:
df_amazon_by_city_sum_vivax_falciparum = pd.DataFrame(
    df_amazon.groupby(['CD']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum']}).drop('Total geral').sum(axis=1), 
    columns=['sum_vivax_falciparum'])

df_amazon_by_city_sum_vivax_falciparum.head()

Unnamed: 0_level_0,sum_vivax_falciparum
CD,Unnamed: 1_level_1
110001,1689.0
110002,7230.18
110003,384.0
110004,5558.163
110005,865.0


In [51]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

706


['110001',
 '110002',
 '110003',
 '110004',
 '110005',
 '110006',
 '110007',
 '110008',
 '110009',
 '110010']

In [52]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.574480
110004,7144.168997
110005,9256.809477
...,...
510860,2484.542265
510880,7990.951784
510885,4383.104360
510890,2925.691341


In [53]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_vivax_falciparum.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_falciparum_by_city

Unnamed: 0,codmun,deaths by million,sum_vivax_falciparum
42,110146,19103.948404,781.000
635,510455,17897.774139,73.000
676,510719,12561.946097,9.000
9,110010,11696.012466,9143.477
376,172065,11291.496865,2.000
...,...,...,...
553,211085,220.156179,20.000
528,210920,198.809924,283.000
538,210975,0.000000,25.000
684,510774,0.000000,4.000


#### <center> Amazon </center>

In [54]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['codmun', 'deaths by million']].mean()

codmun                       inf
deaths by million    3766.204675
dtype: float64

In [55]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['codmun', 'deaths by million']].std()

deaths by million    2523.062031
dtype: float64

In [56]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['sum_vivax_falciparum']].mean()

sum_vivax_falciparum    1689.021272
dtype: float64

In [57]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['sum_vivax_falciparum']].std()

sum_vivax_falciparum    2617.84599
dtype: float64

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

### 13.4 (df covid: deaths by million) vs (df amazon: num cases all p. species)

In [None]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

In [None]:
df_amazon_by_city_sum_all_plasmodium_species = pd.DataFrame(
    df_amazon.groupby(['CD']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum'], 
         'Malarie': ['sum'], 
         'Ovale': ['sum'], 
         'Mista': ['sum']}).drop('Total geral').sum(axis=1), 
    columns=['sum_all_plasmodium_species'])

df_amazon_by_city_sum_all_plasmodium_species.head()

In [None]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

In [83]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.574480
110004,7144.168997
110005,9256.809477
...,...
510860,2484.542265
510880,7990.951784
510885,4383.104360
510890,2925.691341


In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_all_plasmodium_species.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_all_plasmodium_species_by_city

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['codmun', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['codmun', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['sum_all_plasmodium_species']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['sum_all_plasmodium_species']].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

## <center> Lethality, by state, for all years</center>

### 14.1 (df covid: lethality) vs (df amazon: num cases p. vivax)

In [65]:
df_covid_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

# df_covid_sum_newdeaths_by_state.head()

In [66]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

# df_covid_sum_newcases_by_state.head()

In [80]:
df_covid_lethality_by_state = pd.DataFrame(
    (df_covid_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
     df_covid_sum_newcases_by_state['casosNovos'][['sum']])).rename(
    columns={'sum':'lethality'})

df_covid_lethality_by_state.head()
df_covid_lethality_by_state.sort_values(by='lethality', ascending=False)

Unnamed: 0_level_0,lethality
estado,Unnamed: 1_level_1
RJ,0.058807
AM,0.033833
SP,0.033832
PE,0.033408
PA,0.027975
GO,0.027845
MA,0.027704
MT,0.026843
CE,0.026226
RS,0.025884


In [82]:
path_output = os.path.join(os.getcwd(), 'output', 'data')
file_output = 'lethality_covid_by_state.csv'
df_covid_lethality_by_state.sort_values(by='lethality', ascending=False).to_csv(
    os.path.join(path_output, file_output))

In [68]:
df_amazon_by_state_sum_vivax = df_amazon.groupby(['Estado']).agg(
    {'Vivax': ['sum']}).drop('Total')

df_amazon_by_state_sum_vivax

Unnamed: 0_level_0,Vivax
Unnamed: 0_level_1,sum
Estado,Unnamed: 1_level_2
AC,42958.706
AM,198104.113
AP,47844.513
MA,62967.199
MT,31273.361
PA,233566.025
RO,96408.416
RR,75039.942
TO,3166.0


In [69]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

['RO', 'AC', 'AM', 'RR', 'PA', 'AP', 'TO', 'MA', 'MT']

In [70]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_lethality_by_state.index]
).reshape(df_covid_lethality_by_state.shape[0], 1)

df_covid_lethality_by_state[bol_amazon_states]

Unnamed: 0_level_0,lethality
estado,Unnamed: 1_level_1
AC,0.020089
AM,0.033833
AP,0.015049
MA,0.027704
MT,0.026843
PA,0.027975
RO,0.024874
RR,0.015984
TO,0.016125


In [71]:
df_lethality_vs_sum_vivax_by_state = pd.merge(
    left=df_covid_lethality_by_state[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_vivax.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado')

df_lethality_vs_sum_vivax_by_state


merging between different levels can give an unintended result (1 levels on the left,2 on the right)


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



Unnamed: 0,estado,lethality,"(Vivax, sum)"
0,AC,0.020089,42958.706
1,AM,0.033833,198104.113
2,AP,0.015049,47844.513
3,MA,0.027704,62967.199
4,MT,0.026843,31273.361
5,PA,0.027975,233566.025
6,RO,0.024874,96408.416
7,RR,0.015984,75039.942
8,TO,0.016125,3166.0


#### <center> Amazon </center>

In [74]:
df_lethality_vs_sum_vivax_by_state[['estado', 'lethality']].mean()

lethality    0.023164
dtype: float64

In [75]:
df_lethality_vs_sum_vivax_by_state[['estado', 'lethality']].std()

lethality    0.006618
dtype: float64

In [None]:
df_lethality_vs_sum_vivax_by_state[[('Vivax', 'sum')]].mean()

In [None]:
df_lethality_vs_sum_vivax_by_state[[('Vivax', 'sum')]].std()

#### <center>No Amazon</center>

In [76]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].mean()

lethality    0.026302
dtype: float64

In [77]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].std()

lethality    0.00917
dtype: float64

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)]

### 14.2 (df covid: lethality) vs (df amazon: num cases p. falciparum)

In [None]:
df_covid_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

# df_covid_sum_newdeaths_by_state.head()

In [None]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

# df_covid_sum_newcases_by_state.head()

In [None]:
df_covid_lethality_by_state = pd.DataFrame(
    (df_covid_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
     df_covid_sum_newcases_by_state['casosNovos'][['sum']])).rename(
    columns={'sum':'lethality'})

df_covid_lethality_by_state.head()

In [None]:
df_amazon_by_state_sum_falciparum = df_amazon.groupby(['Estado']).agg(
    {'Falciparum': ['sum']}).drop('Total')

df_amazon_by_state_sum_falciparum

# ok

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_lethality_by_state.index]
).reshape(df_covid_lethality_by_state.shape[0], 1)

df_covid_lethality_by_state[bol_amazon_states]

In [None]:
df_lethality_vs_sum_falciparum_by_state = pd.merge(
    left=df_covid_lethality_by_state[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_falciparum.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('lethality'), ascending=False)

df_lethality_vs_sum_falciparum_by_state

#### <center> Amazon </center>

In [None]:
df_lethality_vs_sum_falciparum_by_state[['estado', 'lethality']].mean()

In [None]:
df_lethality_vs_sum_falciparum_by_state[['estado', 'lethality']].std()

In [None]:
df_lethality_vs_sum_falciparum_by_state[[('Falciparum', 'sum')]].mean()

In [None]:
df_lethality_vs_sum_falciparum_by_state[[('Falciparum', 'sum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)]

### 14.3 (df covid: lethality) vs (df amazon: num cases p. vivax + p. falciparum)

In [None]:
df_covid_sum_newdeaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['sum']})

# df_covid_sum_newdeaths_by_state.head()

In [None]:
df_covid_sum_newcases_by_state = df_covid[['estado', 'casosNovos']].groupby(['estado']).agg(
    {'casosNovos': ['sum']})

# df_covid_sum_newcases_by_state.head()

In [None]:
df_covid_lethality_by_state = pd.DataFrame(
    (df_covid_sum_newdeaths_by_state['obitosNovos'][['sum']] / \
     df_covid_sum_newcases_by_state['casosNovos'][['sum']])).rename(
    columns={'sum':'lethality'})

df_covid_lethality_by_state.head()

In [None]:
df_amazon_by_state_sum_vivax_falciparum = pd.DataFrame(
    df_amazon.groupby(['Estado']).agg(
        {'Vivax': ['sum'], 'Falciparum': ['sum']}).drop('Total').sum(axis=1), 
    columns=['sum_vivax_falciparum'])

df_amazon_by_state_sum_vivax_falciparum

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_lethality_by_state.index]
).reshape(df_covid_lethality_by_state.shape[0], 1)

df_covid_lethality_by_state[bol_amazon_states]

In [None]:
df_lethality_vs_sum_vivax_falciparum_by_state = pd.merge(
    left=df_covid_lethality_by_state[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_vivax_falciparum.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('lethality'), ascending=False)

df_lethality_vs_sum_vivax_falciparum_by_state

#### <center> Amazon </center>

In [None]:
df_lethality_vs_sum_vivax_falciparum_by_state[['estado', 'lethality']].mean()

In [None]:
df_lethality_vs_sum_vivax_falciparum_by_state[['estado', 'lethality']].std()

In [None]:
df_lethality_vs_sum_vivax_falciparum_by_state[['sum_vivax_falciparum']].mean()

In [None]:
df_lethality_vs_sum_vivax_falciparum_by_state[[('sum_vivax_falciparum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_lethality_by_state[np.logical_not(bol_amazon_states)]

### 14.4 (df covid: lethality) vs (df amazon: num cases all p. species)

In [None]:
df_covid_mean_deaths_by_state = df_covid[['estado', 'obitosNovos']].groupby(['estado']).agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_state.head()

In [None]:
df_covid_sum_population_by_state = df_covid[['estado', 'populacaoTCU2019']].drop_duplicates().groupby(by=['estado']
).agg({
    'populacaoTCU2019': [sum]
})

df_covid_sum_population_by_state.head()

In [None]:
df_covid_by_state_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_state['obitosNovos']['mean'] / \
df_covid_sum_population_by_state['populacaoTCU2019']['sum']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_state_deaths_by_million.head()

In [None]:
df_amazon_by_state_sum_all_plasmodium_species = pd.DataFrame(
    df_amazon.groupby(['Estado']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum'], 
         'Malarie': ['sum'], 
         'Ovale': ['sum'], 
         'Mista': ['sum']}).drop('Total').sum(axis=1), 
    columns=['sum_all_plasmodium_species'])

df_amazon_by_state_sum_all_plasmodium_species

In [None]:
amazon_states = df_amazon.Estado.drop_duplicates().tolist()
amazon_states.remove('Total')
amazon_states

In [None]:
bol_amazon_states = np.array(
    [state in amazon_states for state in df_covid_by_state_deaths_by_million.index]
).reshape(df_covid_by_state_deaths_by_million.shape[0], 1)

df_covid_by_state_deaths_by_million[bol_amazon_states]

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state = pd.merge(
    left=df_covid_by_state_deaths_by_million[bol_amazon_states].reset_index(),
    right=df_amazon_by_state_sum_all_plasmodium_species.reset_index().rename(columns={'Estado': 'estado'}),
    on='estado').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_all_plasmodium_species_by_state

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_state[['estado', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state[['sum_all_plasmodium_species']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_state[['sum_all_plasmodium_species']].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].mean()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)].std()

In [None]:
df_covid_by_state_deaths_by_million[np.logical_not(bol_amazon_states)]

## <center>Lethality, by city, all years</center>

### 15.1 (df covid: lethality) vs (df amazon: num cases p. vivax)

In [58]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

Unnamed: 0_level_0,obitosNovos
Unnamed: 0_level_1,mean
codmun,Unnamed: 1_level_2
110001.0,0.158442
110002.0,1.034398
110003.0,0.051136
110004.0,0.609819
110005.0,0.151099


In [59]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

Unnamed: 0_level_0,populacaoTCU2019
codmun,Unnamed: 1_level_1
110001.0,22945.0
110002.0,107863.0
110003.0,5312.0
110004.0,85359.0
110005.0,16323.0


In [60]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.57448
110004,7144.168997
110005,9256.809477


In [61]:
df_amazon_by_city_sum_vivax = df_amazon.groupby(['CD']).agg(
    {'Vivax': ['sum']}).drop('Total geral')

df_amazon_by_city_sum_vivax.head()

Unnamed: 0_level_0,Vivax
Unnamed: 0_level_1,sum
CD,Unnamed: 1_level_2
110001,1519.0
110002,4256.198
110003,182.0
110004,4121.163
110005,647.0


In [62]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

706


['110001',
 '110002',
 '110003',
 '110004',
 '110005',
 '110006',
 '110007',
 '110008',
 '110009',
 '110010']

In [63]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

Unnamed: 0_level_0,deaths by million
codmun,Unnamed: 1_level_1
110001,6905.276027
110002,9589.924575
110003,9626.574480
110004,7144.168997
110005,9256.809477
...,...
510860,2484.542265
510880,7990.951784
510885,4383.104360
510890,2925.691341


In [64]:
df_deaths_by_million_vs_sum_vivax_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_vivax.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_by_city


merging between different levels can give an unintended result (1 levels on the left,2 on the right)


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



Unnamed: 0,codmun,deaths by million,"(Vivax, sum)"
42,110146,19103.948404,595.000
635,510455,17897.774139,70.000
676,510719,12561.946097,9.000
9,110010,11696.012466,6295.477
376,172065,11291.496865,2.000
...,...,...,...
553,211085,220.156179,16.000
528,210920,198.809924,283.000
538,210975,0.000000,19.000
684,510774,0.000000,3.000


#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_vivax_by_city[['codmun', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_city[['codmun', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_city[[('Vivax', 'sum')]].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_by_city[[('Vivax', 'sum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

### 15.2 (df covid: lethality) vs (df amazon: num cases p. falciparum)

In [None]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

In [None]:
df_amazon_by_city_sum_falciparum = df_amazon.groupby(['CD']).agg(
    {'Falciparum': ['sum']}).drop('Total geral')

df_amazon_by_city_sum_falciparum.head()

In [None]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

In [None]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_falciparum.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_falciparum_by_city

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_city[['codmun', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_city[['codmun', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_city[[('Falciparum', 'sum')]].mean()

In [None]:
df_deaths_by_million_vs_sum_falciparum_by_city[[('Falciparum', 'sum')]].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

### 15.3 (df covid: lethality) vs (df amazon: num cases p. vivax + p. falciparum)

In [None]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

In [None]:
df_amazon_by_city_sum_vivax_falciparum = pd.DataFrame(
    df_amazon.groupby(['CD']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum']}).drop('Total geral').sum(axis=1), 
    columns=['sum_vivax_falciparum'])

df_amazon_by_city_sum_vivax_falciparum.head()

In [None]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

In [None]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_vivax_falciparum.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_vivax_falciparum_by_city

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['codmun', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['codmun', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['sum_vivax_falciparum']].mean()

In [None]:
df_deaths_by_million_vs_sum_vivax_falciparum_by_city[['sum_vivax_falciparum']].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

### 15.4 (df covid: lethality) vs (df amazon: num cases all p. species)

In [None]:
df_covid_mean_deaths_by_city = df_covid[['codmun', 'obitosNovos']].groupby('codmun').agg(
    {'obitosNovos': ['mean']})

df_covid_mean_deaths_by_city.head()

In [None]:
df_covid_sum_population_by_city = df_covid[['codmun', 'populacaoTCU2019']].drop_duplicates().groupby(
    by='codmun').agg(
    {'populacaoTCU2019': 'sum'})

df_covid_sum_population_by_city.head()

In [None]:
df_covid_by_city_deaths_by_million = pd.DataFrame((df_covid_mean_deaths_by_city['obitosNovos']['mean'] / \
df_covid_sum_population_by_city['populacaoTCU2019']) * 10**9, 
             columns=['deaths by million'])

df_covid_by_city_deaths_by_million.index = df_covid_by_city_deaths_by_million.index.map(str).map(lambda codmun: codmun.split('.')[0])

df_covid_by_city_deaths_by_million.head()

In [None]:
df_amazon_by_city_sum_all_plasmodium_species = pd.DataFrame(
    df_amazon.groupby(['CD']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum'], 
         'Malarie': ['sum'], 
         'Ovale': ['sum'], 
         'Mista': ['sum']}).drop('Total geral').sum(axis=1), 
    columns=['sum_all_plasmodium_species'])

df_amazon_by_city_sum_all_plasmodium_species.head()

In [None]:
amazon_cities = df_amazon.CD.drop_duplicates().tolist()
amazon_cities.remove('Total geral')

print(len(amazon_cities))
amazon_cities[:10]

In [None]:
bol_amazon_cities = np.array(
    [city in amazon_cities for city in df_covid_by_city_deaths_by_million.index]
).reshape(df_covid_by_city_deaths_by_million.shape[0], 1)

df_covid_by_city_deaths_by_million[bol_amazon_cities]

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city = pd.merge(
    left=df_covid_by_city_deaths_by_million[bol_amazon_cities].reset_index(),
    right=df_amazon_by_city_sum_all_plasmodium_species.reset_index().rename(columns={'CD': 'codmun'}),
    on='codmun').sort_values(by=('deaths by million'), ascending=False)

df_deaths_by_million_vs_sum_all_plasmodium_species_by_city

#### <center> Amazon </center>

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['codmun', 'deaths by million']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['codmun', 'deaths by million']].std()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['sum_all_plasmodium_species']].mean()

In [None]:
df_deaths_by_million_vs_sum_all_plasmodium_species_by_city[['sum_all_plasmodium_species']].std()

#### <center>No Amazon</center>

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].mean()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)].std()

In [None]:
df_covid_by_city_deaths_by_million[np.logical_not(bol_amazon_cities)]

# <center>FINAL ANALYSIS</center>

## <center> 01: CFM | CFV</center>

### CFM = n° cases malária / population, by city
### CFV = n° deaths covid X ( n° beds / population), by city

In [None]:
df_amazon_by_city_sum_all_plasmodium_species_2020 = pd.DataFrame(
    df_amazon[df_amazon['Ano'].isin([2020])].groupby(['CD']).agg(
        {'Vivax': ['sum'], 
         'Falciparum': ['sum'], 
         'Malarie': ['sum'], 
         'Ovale': ['sum'], 
         'Mista': ['sum']}).drop('Total geral').sum(axis=1), 
    columns=['sum_all_plasmodium_species'])

df_amazon_by_city_sum_all_plasmodium_species_2020.head()

df_covid