In [None]:

# OS library
import sys
sys.path.append('../')

import glob
import pandas as pd
import numpy as np

from urllib.request import urlopen
import json

# Magic to reload notebook
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = 100

# Covid Saúde

Painel de casos de doença pelo coronavírus 2019 (COVID-19) no Brasil pelo Ministério da Saúde ([website](https://covid.saude.gov.br/))

In [None]:
df_covid_saude = pd.read_parquet("../data/processed/covid_saude.parquet")
df_acoes = pd.read_csv('../data/raw/boletim_covid/acoes.csv', sep='\t', encoding='latin-1')
df_referencias = pd.read_csv('../data/raw/boletim_covid/referencias.csv', sep='\t', encoding='latin-1')

In [None]:
df_acoes['texto'] = df_acoes['texto'].str.replace('\x93', '"')\
                                     .str.replace('\x94', '"')\
                                     .str.replace('\x91', "'")\
                                     .str.replace('\x92', "'")\
                                     .str.replace('\x85', '...')

In [None]:
df_covid_saude_mun = df_covid_saude.loc[df_covid_saude['municipio'].notna()]

df_covid_saude_mun = df_covid_saude_mun[['regiao', 'municipio', 'codmun', 'data', 'obitosNovos']]

df_covid_saude_mun['data'] = pd.to_datetime(df_covid_saude_mun['data'])

df_acoes['data'] = pd.to_datetime(df_acoes['data'], format='%d/%m/%y')

df_acoes['texto'] = df_acoes['texto'].str.wrap(30)
df_acoes['texto'] = df_acoes['texto'].apply(lambda x: x.replace('\n', '<br>'))

df_covid_saude_mun = df_covid_saude_mun.merge(df_acoes, how='left', on='data')

df_depara_wn_day = df_covid_saude_mun[['data']].drop_duplicates()
df_depara_wn_day['weekday'] = df_depara_wn_day['data'].dt.weekday
df_depara_wn_day['week_number'] = 0
df_depara_wn_day.loc[df_depara_wn_day['weekday'] == 0, 'week_number'] = 1
df_depara_wn_day['gropuby_col'] = 1
df_depara_wn_day['week_number'] = df_depara_wn_day.groupby('gropuby_col')['week_number'].cumsum()
df_covid_saude_mun = df_covid_saude_mun.merge(df_depara_wn_day[['data', 'week_number']], how='left', on=['data'])

df_covid_saude_mun.sort_values(by=['municipio', 'data'], ascending=[True, True], inplace=True)
df_covid_saude_reg = df_covid_saude_mun[['regiao', 'week_number', 'data']].drop_duplicates(subset=['regiao', 'week_number'],
                                                                           keep='first')
df_weekly_deaths = df_covid_saude_mun.groupby(['week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_region = df_covid_saude_mun.groupby(['regiao', 'week_number'])['obitosNovos'].sum().reset_index(
    name='new_deaths_week_region')
df_covid_saude_reg = df_covid_saude_reg.merge(df_weekly_deaths_region, how='left', on=['regiao', 'week_number']) \
    .merge(df_weekly_deaths, how='left', on=['week_number'])
df_covid_saude_reg['percentage_deaths'] = df_covid_saude_reg['new_deaths_week_region'] / df_covid_saude_reg['new_deaths_week']
df_covid_saude_reg.fillna(0, inplace=True)

In [None]:

df_covid_saude_mun.to_parquet('../data/app/covid_saude_obito_municipio.parquet', index=False)
df_covid_saude_reg.to_parquet('../data/app/covid_saude_obito_regiao.parquet', index=False)

In [None]:

with urlopen('https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/brazil-states.geojson') as response:
    Brasil = json.load(response)
state_id_map = {}

for feature in  Brasil["features"]:
    feature["id"] = feature["properties"]["name"]
    state_id_map[feature["properties"]["sigla"]] = feature["id"] # definindo a informação do gráfico

In [None]:
df_teste = pd.read_parquet('../data/app/est_cidade.parquet')

In [None]:
df_teste.drop_duplicates().dropna().to_parquet('../data/app/est_cidade.parquet', index=False)