In [None]:
# OS library
import sys
sys.path.append('../')

import glob
import pandas as pd
import numpy as np

# Magic to reload notebook
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = 100

# Covid Saúde

Painel de casos de doença pelo coronavírus 2019 (COVID-19) no Brasil pelo Ministério da Saúde ([website](https://covid.saude.gov.br/))

In [None]:
df_covid_saude = pd.read_parquet("../data/processed/covid_saude.parquet")
df_acoes = pd.read_csv('../data/raw/boletim_covid/acoes.csv', sep='\t', encoding='latin-1')
df_referencias = pd.read_csv('../data/raw/boletim_covid/referencias.csv', sep='\t', encoding='latin-1')

In [None]:
df_acoes['texto'] = df_acoes['texto'].str.replace('\x93', '"')\
                                     .str.replace('\x94', '"')\
                                     .str.replace('\x91', "'")\
                                     .str.replace('\x92', "'")\
                                     .str.replace('\x85', '...')

In [None]:
df_covid_saude_mun = df_covid_saude.loc[df_covid_saude['municipio'].notna()]

df_covid_saude_mun = df_covid_saude_mun[['regiao', 'municipio', 'data', 'obitosNovos']]

df_covid_saude_mun['data'] = pd.to_datetime(df_covid_saude_mun['data'])

df_acoes['data'] = pd.to_datetime(df_acoes['data'], format='%d/%m/%y')

df_acoes['texto'] = df_acoes['texto'].str.wrap(30)
df_acoes['texto'] = df_acoes['texto'].apply(lambda x: x.replace('\n', '<br>'))

df_covid_saude_mun = df_covid_saude_mun.merge(df_acoes, how='left', on='data')

df_depara_wn_day = df_covid_saude_mun[['data']].drop_duplicates()
df_depara_wn_day['weekday'] = df_depara_wn_day['data'].dt.weekday
df_depara_wn_day['week_number'] = 0
df_depara_wn_day.loc[df_depara_wn_day['weekday'] == 0, 'week_number'] = 1
df_depara_wn_day['gropuby_col'] = 1
df_depara_wn_day['week_number'] = df_depara_wn_day.groupby('gropuby_col')['week_number'].cumsum()
df_covid_saude_mun = df_covid_saude_mun.merge(df_depara_wn_day[['data', 'week_number']], how='left', on=['data'])

df_covid_saude_mun.sort_values(by=['municipio', 'data'], ascending=[True, True], inplace=True)
df_covid_saude_reg = df_covid_saude_mun[['regiao', 'week_number', 'data']].drop_duplicates(subset=['regiao', 'week_number'],
                                                                           keep='first')
df_weekly_deaths = df_covid_saude_mun.groupby(['week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_region = df_covid_saude_mun.groupby(['regiao', 'week_number'])['obitosNovos'].sum().reset_index(
    name='new_deaths_week_region')
df_covid_saude_reg = df_covid_saude_reg.merge(df_weekly_deaths_region, how='left', on=['regiao', 'week_number']) \
    .merge(df_weekly_deaths, how='left', on=['week_number'])
df_covid_saude_reg['percentage_deaths'] = df_covid_saude_reg['new_deaths_week_region'] / df_covid_saude_reg['new_deaths_week']
df_covid_saude_reg.fillna(0, inplace=True)

In [None]:
df_covid_saude_mun.to_parquet('../data/app/covid_saude_obito_municipio.parquet', index=False)
df_covid_saude_reg.to_parquet('../data/app/covid_saude_obito_regiao.parquet', index=False)

# Vacinação (processamento da base)

In [45]:
#Pacotes utilizados
import pandas as pd #formatação de bases, nome de colunas
from urllib.request import urlopen #abrir o url das features de estado
import json #abrir o json
import numpy as np #utilizo nas funções
from IPython.core.display import display, HTML #atributos do plotly
from ast import literal_eval #atributo para as funções
import plotly.graph_objects as go #objeto go para criar o mapa
import plotly #formatação dos maps
from plotly.offline import iplot, init_notebook_mode #atributos para abrir o mapa "offline"
import plotly.offline as off #abrir o mapa "offline"
import plotly.express as px #atributo do mapa

In [10]:
from pathlib import Path
import pandas as pd

data_dir = Path(r'C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao')

save = []
for parquet_file in data_dir.glob('*.parquet'):
    print(parquet_file)
    a = pd.read_parquet(parquet_file)
    a.columns = ['paciente_id', 'nrmunicipio','municipio', 'uf', 'data', 'desc' ]
    full_df = a.groupby(['uf',  'data'])['paciente_id'].count().reset_index()
    #print(full_df)
    save.append(full_df)


#full_df.to_csv('csv_file.csv')

C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_AC.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_AL.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_AM.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_AP.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_BA.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_CE.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_DF.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_ES.parquet
C:\Users\mscamargo\Desktop\estudos\my_proj\covid19_previsoes_municipios\data\raw\vacinacao\vacinacao_GO.

In [12]:
full_df = pd.concat(save)

In [13]:
df = pd.read_csv('covid_cases_mun_obit.csv')
df = df.groupby('estado')['populacaoTCU2019'].sum().reset_index()

In [14]:
full_df = full_df.merge(df[['estado', 'populacaoTCU2019']], right_on = 'estado', left_on= "uf")

In [15]:
geo_caract = pd.read_csv('obito_table_uf.csv')
geo_caract['Latitude'] = geo_caract['latitude']
geo_caract['Longitude'] = geo_caract['longitude']
geo_caract = geo_caract[['uf', 'Estado',	'Latitude',	'Longitude']]

In [16]:
geo_caract

Unnamed: 0,uf,Estado,Latitude,Longitude
0,AC,Acre,-8.77,-70.55
1,AL,Alagoas,-9.62,-36.82
2,AM,Amazonas,1.41,-51.77
3,AP,Amapá,-3.47,-65.1
4,BA,Bahia,-13.29,-41.71
5,CE,Ceará,-5.2,-39.53
6,DF,Distrito Federal,-15.83,-47.86
7,ES,Espírito Santo,-19.19,-40.34
8,GO,Goiás,-15.98,-49.86
9,MA,Maranhão,-5.42,-45.44


In [17]:
df = full_df.merge(geo_caract, on = 'uf')

In [20]:
df['ind'] = df['paciente_id']/df['populacaoTCU2019']

In [21]:
df.to_parquet('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/app/opendatasus_vacinacao.parquet', index=False)