In [None]:
# OS library
import sys
sys.path.append('../')

import glob
import pandas as pd
import numpy as np

from urllib.request import urlopen
import json

# Magic to reload notebook
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = 100

# Covid Saúde

Painel de casos de doença pelo coronavírus 2019 (COVID-19) no Brasil pelo Ministério da Saúde ([website](https://covid.saude.gov.br/))

In [None]:
df_covid_saude = pd.read_parquet("../data/processed/covid_saude.parquet")
df_acoes = pd.read_csv('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/raw/boletim_covid/acoes.csv', sep='\t', encoding='latin-1')
df_referencias = pd.read_csv('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/raw/boletim_covid/referencias.csv', sep='\t', encoding='latin-1')
df_death_predictions = pd.read_csv('../data/raw/prediction_two_models.csv', encoding="ISO-8859-1")

In [None]:
df_death_predictions['data'] = pd.to_datetime(df_death_predictions['data'])

df_covid_saude_mun = df_covid_saude.loc[df_covid_saude['municipio'].notna()]

df_covid_saude_mun = df_covid_saude_mun[[
    'regiao', 'estado', 'municipio', 'codmun', 'nomeRegiaoSaude',
    'codRegiaoSaude', 'data', 'obitosNovos'
]]

df_covid_saude_mun['data'] = pd.to_datetime(df_covid_saude_mun['data'])

df_depara_wn_day = pd.concat([df_covid_saude_mun[['data']].drop_duplicates(), df_death_predictions[['data']].drop_duplicates()])
df_covid_saude_mun['data'] = pd.to_datetime(df_covid_saude_mun['data'])
df_depara_wn_day['weekday'] = df_depara_wn_day['data'].dt.weekday
df_depara_wn_day['week_number'] = 0
df_depara_wn_day.loc[df_depara_wn_day['weekday'] == 0, 'week_number'] = 1
df_depara_wn_day['groupby_col'] = 1
df_depara_wn_day['week_number'] = df_depara_wn_day.groupby(
    'groupby_col')['week_number'].cumsum()
df_depara_wn_day['week_number_day'] = df_depara_wn_day['week_number'] + df_depara_wn_day['weekday']/7 # used to plot
df_death_predictions = df_death_predictions.merge(df_depara_wn_day[['data', 'week_number_day']], how='left', on='data')

df_covid_saude_mun = df_covid_saude_mun.merge(
    df_depara_wn_day[['data', 'week_number']], how='left', on=['data'])

df_covid_saude_mun.sort_values(by=['municipio', 'data'],
                               ascending=[True, True],
                               inplace=True)
df_covid_saude_br = df_covid_saude_mun[['week_number',
                                        'data']].drop_duplicates(
                                            subset=['week_number'],
                                            keep='first')
df_covid_saude_reg = df_covid_saude_mun[['regiao', 'week_number',
                                         'data']].drop_duplicates(
                                             subset=['regiao', 'week_number'],
                                             keep='first')
df_covid_saude_state = df_covid_saude_mun[[
    'regiao', 'estado', 'week_number', 'data'
]].drop_duplicates(subset=['estado', 'week_number'], keep='first')
df_covid_saude_regsau = df_covid_saude_mun[[
    'estado', 'nomeRegiaoSaude', 'codRegiaoSaude', 'week_number', 'data'
]].drop_duplicates(subset=['nomeRegiaoSaude', 'codRegiaoSaude', 'week_number'],
                   keep='first')
df_covid_saude_city = df_covid_saude_mun[[
    'nomeRegiaoSaude', 'municipio', 'codmun', 'week_number', 'data'
]].drop_duplicates(subset=['municipio', 'codmun', 'week_number'], keep='first')

df_weekly_deaths_level_up_br = df_covid_saude_mun.groupby(
    ['week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_level_up_reg = df_covid_saude_mun.groupby(
    ['week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_level_up_state = df_covid_saude_mun.groupby(
    ['regiao', 'week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_level_up_regsau = df_covid_saude_mun.groupby(
    ['estado', 'week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')
df_weekly_deaths_level_up_city = df_covid_saude_mun.groupby(
    ['nomeRegiaoSaude', 'week_number'])['obitosNovos'].sum().reset_index(name='new_deaths_week')

df_weekly_deaths_br = df_covid_saude_mun.groupby([
    'week_number'
])['obitosNovos'].sum().reset_index(name='new_deaths_week_division')
df_weekly_deaths_region = df_covid_saude_mun.groupby([
    'regiao', 'week_number'
])['obitosNovos'].sum().reset_index(name='new_deaths_week_division')
df_weekly_deaths_state = df_covid_saude_mun.groupby([
    'regiao', 'estado', 'week_number'
])['obitosNovos'].sum().reset_index(name='new_deaths_week_division')
df_weekly_deaths_regsau = df_covid_saude_mun.groupby([
    'estado', 'nomeRegiaoSaude', 'codRegiaoSaude', 'week_number'
])['obitosNovos'].sum().reset_index(name='new_deaths_week_division')
df_weekly_deaths_city = df_covid_saude_mun.groupby([
    'nomeRegiaoSaude', 'municipio', 'codmun', 'week_number'
])['obitosNovos'].sum().reset_index(name='new_deaths_week_division')

In [None]:
df_covid_saude_br = df_covid_saude_br.merge(df_weekly_deaths_br, how='left', on=['week_number']) \
    .merge(df_weekly_deaths_level_up_br, how='left', on=['week_number'])
df_covid_saude_reg = df_covid_saude_reg.merge(df_weekly_deaths_region, how='left', on=['regiao', 'week_number']) \
    .merge(df_weekly_deaths_level_up_reg, how='left', on=['week_number'])
df_covid_saude_state = df_covid_saude_state.merge(df_weekly_deaths_state, how='left', on=['regiao', 'estado', 'week_number']) \
    .merge(df_weekly_deaths_level_up_state, how='left', on=['regiao', 'week_number'])
df_covid_saude_regsau = df_covid_saude_regsau.merge(df_weekly_deaths_regsau, how='left', on=['estado', 'nomeRegiaoSaude', 'codRegiaoSaude', 'week_number']) \
    .merge(df_weekly_deaths_level_up_regsau, how='left', on=['estado', 'week_number'])
df_covid_saude_city = df_covid_saude_city.merge(df_weekly_deaths_city, how='left', on=['nomeRegiaoSaude', 'municipio', 'codmun', 'week_number']) \
    .merge(df_weekly_deaths_level_up_city, how='left', on=['nomeRegiaoSaude', 'week_number'])

df_covid_saude_br['percentage_deaths'] = df_covid_saude_br[
    'new_deaths_week_division'] / df_covid_saude_br['new_deaths_week']
df_covid_saude_reg['percentage_deaths'] = df_covid_saude_reg[
    'new_deaths_week_division'] / df_covid_saude_reg['new_deaths_week']
df_covid_saude_state['percentage_deaths'] = df_covid_saude_state[
    'new_deaths_week_division'] / df_covid_saude_state['new_deaths_week']
df_covid_saude_regsau['percentage_deaths'] = df_covid_saude_regsau[
    'new_deaths_week_division'] / df_covid_saude_regsau['new_deaths_week']
df_covid_saude_city['percentage_deaths'] = df_covid_saude_city[
    'new_deaths_week_division'] / df_covid_saude_city['new_deaths_week']

df_covid_saude_br.fillna(0, inplace=True)
df_covid_saude_reg.fillna(0, inplace=True)
df_covid_saude_state.fillna(0, inplace=True)
df_covid_saude_regsau.fillna(0, inplace=True)
df_covid_saude_city.fillna(0, inplace=True)

df_covid_saude_grouped = pd.concat([
    df_covid_saude_city, df_covid_saude_regsau, df_covid_saude_state,
    df_covid_saude_reg, df_covid_saude_br
])

##################################################################
# Tratamento de dados das noticias
##################################################################

df_acoes['texto'] = df_acoes['texto'].str.replace('\x93', '"')\
                                     .str.replace('\x94', '"')\
                                     .str.replace('\x91', "'")\
                                     .str.replace('\x92', "'")\
                                     .str.replace('\x85', '...')

df_acoes['data'] = pd.to_datetime(df_acoes['data'], format='%d/%m/%y')

df_acoes['texto'] = df_acoes['texto'].str.wrap(30)
df_acoes['texto'] = df_acoes['texto'].apply(lambda x: x.replace('\n', '<br>'))
df_acoes = df_acoes.merge(df_depara_wn_day, how='left', on='data')

df_acoes['noticia'] = '<b>Data: </b>' + df_acoes['data'].dt.strftime(
    '%d/%m/%Y') + '<br><b>Tipo: </b>' + df_acoes['tipo'].astype(
        str) + '<br><b>Notícia: </b>' + df_acoes['texto']

df_noticias = df_acoes.groupby([
    'week_number'
])['noticia'].apply(lambda x: '<br><br>'.join(x)).reset_index()

# df_noticias['noticia'] = '<div style="height:120px;width:120px;border:1px solid #ccc;font:16px/26px Georgia, Garamond, Serif;overflow:auto;">' + df_noticias['noticia'] + '</div>'
# df_noticias['noticia'] = '<p>' + df_noticias['noticia'] + '</p>'
df_covid_saude_grouped = df_covid_saude_grouped.merge(df_noticias,
                                                      how='left',
                                                      on='week_number')

df_covid_saude_grouped['noticia'] = df_covid_saude_grouped['noticia'].where(pd.notnull(df_covid_saude_grouped['noticia']), None)

In [1]:
# TREAT COVID DATA TO GET "TIPO" VARIABLE

In [None]:
import pandas as pd
df_acoes = pd.read_csv('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/raw/boletim_covid/acoes.csv', sep='\t', encoding='latin-1')
df_referencias = pd.read_csv('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/raw/boletim_covid/referencias.csv', sep='\t', encoding='latin-1')


In [None]:
df_acoes

Unnamed: 0,N,data,tipo,texto
0,1,2020-02-03,Atos normativos,Portaria 188 do Ministério da<br>Saúde declara...
1,2,2020-02-06,Atos normativos,Presidente sanciona a Lei<br>13.979 que dispõe...
2,3,2020-02-13,Atos normativos,Ministério da Saúde apresenta<br>Plano de Cont...
3,4,2020-03-07,Propaganda,"Presidente viaja à Flórida,<br>nos Estados Uni..."
4,5,2020-03-09,Propaganda,"Em entrevista ao apresentador<br>Ratinho, o Pr..."
...,...,...,...,...
249,246,2021-05-23,Propaganda,Em ato político que gerou<br>aglomeração no Ri...
250,247,2021-05-26,Atos de governo,O Ministro da Saúde exonerou o<br>Superintende...
251,248,2021-05-26,Propaganda,Em audiência pública na Câmara<br>dos Deputado...
252,249,2021-05-27,Atos de governo,"O Presidente e o Advogado-<br>Geral da União, ..."


In [None]:
df_acoes['data'] = pd.to_datetime(df_acoes['data'])
df_acoes['weekday'] = df_acoes['data'].dt.weekday
df_acoes['week_number'] = 0
df_acoes.loc[df_acoes['weekday'] == 0, 'week_number'] = 1
df_acoes['groupby_col'] = 1
df_acoes['week_number'] = df_acoes.groupby(
    'groupby_col')['week_number'].cumsum()
df_acoes['week_number_day'] = df_acoes['week_number'] + df_acoes['weekday']/7 # used to plot

In [None]:
df_acoes

Unnamed: 0,N,data,tipo,texto,weekday,week_number,groupby_col,week_number_day,noticia
0,1,2020-02-03,Atos normativos,Portaria 188 do Ministério<br>da<br>Saúde decl...,0,1,1,1.000000,<b>Data: </b>03/02/2020<br><b>Tipo: </b>Atos n...
1,2,2020-02-06,Atos normativos,Presidente sanciona a<br>Lei<br>13.979 que dis...,3,1,1,1.428571,<b>Data: </b>06/02/2020<br><b>Tipo: </b>Atos n...
2,3,2020-02-13,Atos normativos,Ministério da Saúde<br>apresenta<br>Plano de<b...,3,1,1,1.428571,<b>Data: </b>13/02/2020<br><b>Tipo: </b>Atos n...
3,4,2020-03-07,Propaganda,"Presidente viaja à<br>Flórida,<br>nos Estados<...",5,1,1,1.714286,<b>Data: </b>07/03/2020<br><b>Tipo: </b>Propag...
4,5,2020-03-09,Propaganda,"Em entrevista ao<br>apresentador<br>Ratinho, o...",0,2,1,2.000000,<b>Data: </b>09/03/2020<br><b>Tipo: </b>Propag...
...,...,...,...,...,...,...,...,...,...
249,246,2021-05-23,Propaganda,Em ato político que<br>gerou<br>aglomeração no...,6,41,1,41.857143,<b>Data: </b>23/05/2021<br><b>Tipo: </b>Propag...
250,247,2021-05-26,Atos de governo,O Ministro da Saúde exonerou<br>o<br>Superinte...,2,41,1,41.285714,<b>Data: </b>26/05/2021<br><b>Tipo: </b>Atos d...
251,248,2021-05-26,Propaganda,Em audiência pública na<br>Câmara<br>dos Deput...,2,41,1,41.285714,<b>Data: </b>26/05/2021<br><b>Tipo: </b>Propag...
252,249,2021-05-27,Atos de governo,O Presidente e o<br>Advogado-<br>Geral da Uniã...,3,41,1,41.428571,<b>Data: </b>27/05/2021<br><b>Tipo: </b>Atos d...


In [None]:
##################################################################
# Tratamento de dados das noticias
##################################################################

df_acoes['texto'] = df_acoes['texto'].str.replace('\x93', '"')\
                                     .str.replace('\x94', '"')\
                                     .str.replace('\x91', "'")\
                                     .str.replace('\x92', "'")\
                                     .str.replace('\x85', '...')

df_acoes['data'] = pd.to_datetime(df_acoes['data'], format='%d/%m/%y')

df_acoes['texto'] = df_acoes['texto'].str.wrap(30)
df_acoes['texto'] = df_acoes['texto'].apply(lambda x: x.replace('\n', '<br>'))
#df_acoes = df_acoes.merge(df_depara_wn_day, how='left', on='data')

df_acoes['noticia'] = '<b>Data: </b>' + df_acoes['data'].dt.strftime(
    '%d/%m/%Y') + '<br><b>Tipo: </b>' + df_acoes['tipo'].astype(
        str) + '<br><b>Notícia: </b>' + df_acoes['texto']

df_noticias = df_acoes.groupby([
    'data', 'tipo'
])[['noticia']].apply(lambda x: '<br><br>'.join(x)).reset_index()

# df_noticias['noticia'] = '<div style="height:120px;width:120px;border:1px solid #ccc;font:16px/26px Georgia, Garamond, Serif;overflow:auto;">' + df_noticias['noticia'] + '</div>'
# df_noticias['noticia'] = '<p>' + df_noticias['noticia'] + '</p>'


In [None]:
df = df_noticias[['data', 'tipo']].drop_duplicates()

In [None]:
df['tipo'].unique()

array(['1 Propaganda', 'Atos normativos', 'Propaganda', 'Atos de governo',
       '0 Propaganda', 'Atos de gestão', 'Atos de Governo'], dtype=object)

In [None]:
import numpy as np
df['tipo'] = np.where(df['tipo'] == '0 Propaganda', 'Propaganda', df['tipo'])
df['tipo'] = np.where(df['tipo'] == '1 Propaganda', 'Propaganda', df['tipo'])
df['tipo'] = np.where(df['tipo'] == 'Atos de governo', 'Atos de Governo', df['tipo'])

In [None]:
df['tipo_at'] = df.groupby(['data'])['tipo'].transform(lambda x: ','.join(x))
df_finally_not = df[['data','tipo_at']].drop_duplicates()

In [None]:
df_weekly_deaths = pd.read_parquet('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/app/covid_saude_obito_grouped.parquet')


In [40]:
ok['tipo_at'].unique()

array([nan, 'Atos normativos,Propaganda', 'Atos de Governo',
       'Atos de Governo,Atos normativos,Propaganda',
       'Propaganda,Atos de Governo,Atos normativos,Propaganda',
       'Atos de Governo,Propaganda', 'Atos normativos',
       'Atos de gestão,Atos de Governo,Atos normativos', 'Propaganda',
       'Propaganda,Atos de Governo,Propaganda',
       'Propaganda,Atos de Governo,Atos de Governo,Atos normativos,Propaganda'],
      dtype=object)

In [33]:
ok = df_weekly_deaths.merge(df_finally_not, on = 'week_number', how = 'left')
ok.to_parquet('C:/Users/mscamargo/Desktop/estudos/my_proj/covid19_previsoes_municipios/data/app/covid_saude_obito_grouped_new.parquet', index=False)

In [None]:
df_covid_saude_grouped.to_parquet('../data/app/covid_saude_obito_grouped.parquet', index=False)

In [None]:
df_death_predictions.to_parquet('../data/app/death_predictions.parquet', index=False)

In [None]:
df_teste = pd.read_parquet('../data/app/est_cidade.parquet')

In [None]:
df_teste.drop_duplicates().dropna().to_parquet('../data/app/est_cidade.parquet', index=False)