# Description
This notebook aims to operationalize the concepts learned in the course [Análise de série temporal: COVID-19](https://cursos.alura.com.br/course/analise-serie-temporal-covid-19)

# Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import datetime as date

# Datasets

In [None]:
df_total = pd.read_csv("https://raw.githubusercontent.com/alura-cursos/COVID-Alura/main/cases-brazil-states.csv")
df_total.head()

Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,casosMS,obitos_por_100k,casos_por_100k,obitos_por_casos,recuperados,suspeitos,testes,testes_por_100k
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
1,9,2020-02-25,Brazil,TOTAL,TOTAL,0,0,1,1,0,0,0.0,0.00048,0.0,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
3,9,2020-02-26,Brazil,TOTAL,TOTAL,0,0,0,1,0,1,0.0,0.00048,0.0,,,,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,


# Analysis

In [None]:
df = df_total[df_total['estado'] != 'TOTAL']
df['data'] = pd.to_datetime(df['data']).dt.date
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data'] = pd.to_datetime(df['data']).dt.date


Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,casosMS,obitos_por_100k,casos_por_100k,obitos_por_casos,recuperados,suspeitos,testes,testes_por_100k
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,1,0.0,0.00436,0.0,,,,
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,2,0.0,0.00436,0.0,,,,


In [None]:
fig = px.line(df, x='data', y='Obitos', color='estado', template = 'seaborn',
              width = 800,
              title = 'Óbitos por Estado em Função do Tempo')
fig.update_layout({'title':{'font': {'size': 24}, 'x': 0.09},
                   'yaxis':{'title': {'text': 'Data', 'font':{'size': 24}}},
                   'xaxis':{'title': {'text': 'Óbitos', 'font':{'size': 24}}}
                   })

A curva de óbitos aparenta apresentar um crescimento exponencial, uma forma de confirmarmos se a curva realmente apresenta um crescimento exponencia é analisar seu comportamento em uma escala logarítimica, para tanto vamos analisar apenas o estado de SP

In [None]:
df_sp = df[df['estado'] == 'SP']
df_sp.head()

Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,casosMS,obitos_por_100k,casos_por_100k,obitos_por_casos,recuperados,suspeitos,testes,testes_por_100k
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,1,0.0,0.00436,0.0,,,,
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,2,0.0,0.00436,0.0,,,,


In [None]:
fig = px.line(df_sp, x='data', y='Obitos', template = 'seaborn',
              title = 'Óbitos em Função do Tempo no estado de SP',
              width = 800,
              log_y=True)
fig.update_layout({'title':{'font': {'size': 24}, 'x': 0.09},
                   'yaxis':{'title': {'text': 'Data', 'font':{'size': 24}}},
                   'xaxis':{'title': {'text': 'Óbitos', 'font':{'size': 24}}}
                   })

O shape concavo da curva acima, na escala logarítimica, aponta que curva de óbitos do estado de são paulo de fato segue um comportamento exponencial. Dado que a curva ainda não atingiu um platô é possível concluir que a taxa de crescimento continua aumentando.

In [None]:
fig = px.line(df, x='data', y='Obitos', color='estado',template = 'seaborn',
              title = 'Óbitos em Função do Tempo no estado de SP',
              width = 800,
              log_y=True)
fig.update_layout({'title':{'font': {'size': 24}, 'x': 0.09},
                   'yaxis':{'title': {'text': 'Data', 'font':{'size': 24}}},
                   'xaxis':{'title': {'text': 'Óbitos', 'font':{'size': 24}}}
                   })

Esse comportamento se verifica em praticamente todos os estados, embora alguns apresentem taxas diferentes

In [None]:
df_sp.columns

Index(['semana', 'data', 'pais', 'estado', 'cidade', 'novosObitos', 'Obitos',
       'novosCasos', 'Casos', 'obitosMS', 'casosMS', 'obitos_por_100k',
       'casos_por_100k', 'obitos_por_casos', 'recuperados', 'suspeitos',
       'testes', 'testes_por_100k'],
      dtype='object')

In [None]:
fig = px.bar(df_sp, x='data', y=['suspeitos', 'recuperados', 'Obitos'], template='none',
             barmode = 'overlay', opacity = 0.8,
             width = 800,
             labels = {'Obitos': 'Óbitos',
                       'suspeitos': 'Suspeitos',
                       'recuperados': 'Recuperados'})
fig.update_layout({'xaxis':{'title': {'text': 'Data', 'font':{'size': 24}}},
                   'yaxis':{'title': {'text': 'Óbitos, Recuperados e Suspeitos <br> no estado de SP', 'font':{'size': 22}}}
                   })
fig

O número de recuperados ficou constante por um bom período, dando um pulo no dia 1o de julho, isso coincide com o dia que o estado de sp passou a incluir o número de recuperados de casos leves

In [None]:
def plotar_linha(dataset, x, y, titulo, xlabel, ylabel, color=None):
  fig = px.line(dataset, x=x, y=y, color=color,template = 'none',
                title = titulo,
                width = 800)
  fig.update_layout({'title':{'font': {'size': 20}, 'x': 0.09},
                     'yaxis':{'title': {'text': ylabel, 'font':{'size': 20}}},
                     'xaxis':{'title': {'text': xlabel, 'font':{'size': 20}}}
                     })
  return fig

def plotar_barra(dataset, x, y, titulo, xlabel, ylabel, color=None):
  fig = px.bar(dataset, x=x, y=y, color=color,template = 'none',
                title = titulo,
                barmode = 'overlay', opacity = 0.8,
                width = 800,
                height = 400)
  fig.update_layout({'title':{'font': {'size': 20}, 'x': 0.09},
                     'yaxis':{'title': {'text': ylabel, 'font':{'size': 20}}},
                     'xaxis':{'title': {'text': xlabel, 'font':{'size': 20}}}
                     })
  return fig

In [None]:
plotar_barra(df_sp, 'data', 'novosCasos', 'Novos casos em SP', 'Data', '# Novos Casos', None)

In [None]:
df_sp['aceleracaoCasos'] = df_sp['novosCasos'].diff()
df_sp['aceleracaoObitos'] = df_sp['novosObitos'].diff()
df_sp['aceleracaoRecuperados'] = df_sp['recuperados'].diff()
df_sp.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,...,obitos_por_100k,casos_por_100k,obitos_por_casos,recuperados,suspeitos,testes,testes_por_100k,aceleracaoCasos,aceleracaoObitos,aceleracaoRecuperados
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,...,0.0,0.00218,0.0,,,,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,...,0.0,0.00218,0.0,,,,,-1.0,0.0,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,...,0.0,0.00218,0.0,,,,,0.0,0.0,
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,...,0.0,0.00436,0.0,,,,,1.0,0.0,
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,...,0.0,0.00436,0.0,,,,,-1.0,0.0,


In [None]:
plotar_barra(df_sp, 'data', 'aceleracaoObitos', 
             'Aceleração de Óbitos em SP',
             'Data', '# Aceleração')

Analisado a aceleração de novos óbitos diários, é possível ver que temos períodos de aceleração e desaceleração dos novos óbitos, porém o tamanho das barras positivas é sempre maior, o que contribui para o crescimento do número de novos óbitos no acumulado.
<br> Além disso, finais de semana e feriados possuem um padrão de subnotificação, o que causa uma sazonalidade nesses dados

In [None]:
plotar_barra(df_sp, 'data', 'aceleracaoRecuperados', 
             'Aceleração de Recuperados em SP',
             'Data', '# Aceleração')

No gráfico acima, fizemos uma curva de aceleração em cima do número acumulado, ao invés de novos recuperados, dessa forma esse número nunca será negativo, o que não significa que não tivemos períodos com maior número ou menor número de novos recuperados.

## média móvel

In [None]:
df_sp['mediaObitos'] = df_sp['novosObitos'].rolling(window=7, center=False).mean()
df_sp['mediaObitos_14'] = df_sp['novosObitos'].rolling(window=14, center=False).mean()
df_sp.head(14)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,...,obitos_por_casos,recuperados,suspeitos,testes,testes_por_100k,aceleracaoCasos,aceleracaoObitos,aceleracaoRecuperados,mediaObitos,mediaObitos_14
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,...,0.0,,,,,,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,...,0.0,,,,,-1.0,0.0,,,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,...,0.0,,,,,0.0,0.0,,,
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,...,0.0,,,,,1.0,0.0,,,
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,...,0.0,,,,,-1.0,0.0,,,
10,10,2020-03-01,Brazil,SP,TOTAL,0,0,0,2,0,...,0.0,,,,,0.0,0.0,,,
12,10,2020-03-02,Brazil,SP,TOTAL,0,0,0,2,0,...,0.0,,,,,0.0,0.0,,0.0,
14,10,2020-03-03,Brazil,SP,TOTAL,0,0,0,2,0,...,0.0,,,,,0.0,0.0,,0.0,
16,10,2020-03-04,Brazil,SP,TOTAL,0,0,1,3,0,...,0.0,,,,,1.0,0.0,,0.0,
19,10,2020-03-05,Brazil,SP,TOTAL,0,0,3,6,0,...,0.0,,,,,2.0,0.0,,0.0,


In [None]:
fig1 = plotar_linha(df_sp, 'data', ['mediaObitos', 'mediaObitos_14'], 
              'Média móvel de óbitos em SP', 'Data', 'Média')
fig2 = px.bar(df_sp, x='data', y='novosObitos', template='none')
fig2.update_traces(marker_color='lightgrey', name='novosObitos', legendgroup = 'novosObitos',
                   showlegend=True)
fig1.add_trace(fig2['data'][0])
fig1

## Autocorrelação

In [None]:
from pandas.plotting import autocorrelation_plot

In [None]:
#Autocorrplot
def plotly_autocorr(series, title = ''): 
  ax =autocorrelation_plot(series)
  corr_x = ax.lines[5].get_data()[0]
  corr_y = ax.lines[5].get_data()[1]
  interval_1 = ax.lines[0].get_data()[1][0]
  interval_2 = ax.lines[1].get_data()[1][0]
  plt.close()
  fig = px.line(x = corr_x, y= corr_y, template='none', width=880)
  # fig.update_traces(line_color='#9BCD9B')
  fig.update_yaxes(range=[-1,1], title='Autocorrelation')
  fig.update_xaxes(title='Lag')
  fig.update_layout({'title':{'text': title, 'font':{'size': 18}, 'x':0.09, 'y': 0.93}})
  fig.add_hline(y = interval_1, line_dash='dash', line_width =1.5, opacity=1, line_color='grey')
  fig.add_hline(y = -interval_1, line_dash='dash', line_width =1.5, opacity=1, line_color='grey')
  fig.add_hline(y = 0, line_width =1.0, opacity=1, line_color='grey')
  fig.add_hline(y = interval_2, line_width =1.5, opacity=1, line_color='grey')
  fig.add_hline(y = -interval_2, line_width =1.5, opacity=1, line_color='grey')
  return fig

In [None]:
plotly_autocorr(df_sp['Obitos'], "Correlação dos Óbitos")

Acima é possível perceber que a série de óbitos totais possui uma alta correlação com ela mesma com lags até 2 semanas, diminuindo a correlação a partir disso.

In [None]:
plotly_autocorr(df_sp['novosObitos'], "Correlação dos Novos Óbitos")

Analisando a série de novos óbitos, a mesma sazonalidade observada na série original também se faz presente na análise de autocorrelação, uma vez que é esperado que o número de óbitos de um dia de semana não se correlacione com o número de óbitos no final de semana, onde a notificação costuma ser menor.

<br> Uma forma de tentar ajustar essa sazonalidade é utilizando a média móvel

In [None]:
plotly_autocorr(df_sp['mediaObitos'][7:], "Correlação dos Novos Óbitos (média móvel)")

Com as médias móveis, as oscilações decorrentes da sazonalidade não se fazem mais presentes

In [None]:
plotly_autocorr(df_sp['aceleracaoObitos'][1:], "AutoCorrelação da Aceleração de Óbitos")

Também é presente a sazonalidade de final de semana para a aceleração de óbitos.
<br> Conforme o tempo passa a autocorrelação da série cai, e os eventos deixam de ser dependentes ao longo do tempo.

In [None]:
df_sp['aceleracaoObitosMediaMovel'] = df_sp['aceleracaoObitos'].rolling(window= 7, center=False).mean()
df_sp.head(10)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,...,recuperados,suspeitos,testes,testes_por_100k,aceleracaoCasos,aceleracaoObitos,aceleracaoRecuperados,mediaObitos,mediaObitos_14,aceleracaoObitosMediaMovel
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,...,,,,,,,,,,
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,...,,,,,-1.0,0.0,,,,
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,...,,,,,0.0,0.0,,,,
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,...,,,,,1.0,0.0,,,,
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,...,,,,,-1.0,0.0,,,,
10,10,2020-03-01,Brazil,SP,TOTAL,0,0,0,2,0,...,,,,,0.0,0.0,,,,
12,10,2020-03-02,Brazil,SP,TOTAL,0,0,0,2,0,...,,,,,0.0,0.0,,0.0,,
14,10,2020-03-03,Brazil,SP,TOTAL,0,0,0,2,0,...,,,,,0.0,0.0,,0.0,,0.0
16,10,2020-03-04,Brazil,SP,TOTAL,0,0,1,3,0,...,,,,,1.0,0.0,,0.0,,0.0
19,10,2020-03-05,Brazil,SP,TOTAL,0,0,3,6,0,...,,,,,2.0,0.0,,0.0,,0.0


In [None]:
plotly_autocorr(df_sp['aceleracaoObitosMediaMovel'][8:], 'Autocorrelação da aceleração de óbitos (média móvel)')

## Sazonalidade

In [None]:
df_sp['mes'] = pd.to_datetime(df_sp['data'])
df_sp['dia_da_semana'] = df_sp['mes'].dt.day_name()
df_sp.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,...,testes,testes_por_100k,aceleracaoCasos,aceleracaoObitos,aceleracaoRecuperados,mediaObitos,mediaObitos_14,aceleracaoObitosMediaMovel,mes,dia_da_semana
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,...,,,,,,,,,2020-02-25,Tuesday
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,...,,,-1.0,0.0,,,,,2020-02-26,Wednesday
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,...,,,0.0,0.0,,,,,2020-02-27,Thursday
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,...,,,1.0,0.0,,,,,2020-02-28,Friday
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,...,,,-1.0,0.0,,,,,2020-02-29,Saturday


In [None]:
dias_traduzidos = {
    'Sunday' : 'Domingo',
    'Monday' : 'Segunda',
    'Tuesday' : 'Terça',
    'Wednesday' : 'Quarta',
    'Thursday' : 'Quinta',
    'Friday' : 'Sexta',
    'Saturday' : 'Sábado'
}

In [None]:
df_sp['dia_da_semana'] = df_sp['dia_da_semana'].map(dias_traduzidos)
df_sp.head(10)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,semana,data,pais,estado,cidade,novosObitos,Obitos,novosCasos,Casos,obitosMS,...,testes,testes_por_100k,aceleracaoCasos,aceleracaoObitos,aceleracaoRecuperados,mediaObitos,mediaObitos_14,aceleracaoObitosMediaMovel,mes,dia_da_semana
0,9,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,...,,,,,,,,,2020-02-25,Terça
2,9,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,...,,,-1.0,0.0,,,,,2020-02-26,Quarta
4,9,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,...,,,0.0,0.0,,,,,2020-02-27,Quinta
6,9,2020-02-28,Brazil,SP,TOTAL,0,0,1,2,0,...,,,1.0,0.0,,,,,2020-02-28,Sexta
8,9,2020-02-29,Brazil,SP,TOTAL,0,0,0,2,0,...,,,-1.0,0.0,,,,,2020-02-29,Sábado
10,10,2020-03-01,Brazil,SP,TOTAL,0,0,0,2,0,...,,,0.0,0.0,,,,,2020-03-01,Domingo
12,10,2020-03-02,Brazil,SP,TOTAL,0,0,0,2,0,...,,,0.0,0.0,,0.0,,,2020-03-02,Segunda
14,10,2020-03-03,Brazil,SP,TOTAL,0,0,0,2,0,...,,,0.0,0.0,,0.0,,0.0,2020-03-03,Terça
16,10,2020-03-04,Brazil,SP,TOTAL,0,0,1,3,0,...,,,1.0,0.0,,0.0,,0.0,2020-03-04,Quarta
19,10,2020-03-05,Brazil,SP,TOTAL,0,0,3,6,0,...,,,2.0,0.0,,0.0,,0.0,2020-03-05,Quinta


In [None]:
agrupados_sp = df_sp.groupby('dia_da_semana')['Obitos', 'novosObitos', 'aceleracaoObitos'].mean().round()
agrupados_sp.head()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,Obitos,novosObitos,aceleracaoObitos
dia_da_semana,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Domingo,6846.0,60.0,-95.0
Quarta,6975.0,181.0,-25.0
Quinta,7165.0,190.0,9.0
Segunda,6887.0,41.0,-20.0
Sexta,7342.0,177.0,-13.0


In [None]:
agrupados_sp['dias_da_semana'] = agrupados_sp.index
agrupados_sp.head()

Unnamed: 0_level_0,Obitos,novosObitos,aceleracaoObitos,dias_da_semana
dia_da_semana,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Domingo,6846.0,60.0,-95.0,Domingo
Quarta,6975.0,181.0,-25.0,Quarta
Quinta,7165.0,190.0,9.0,Quinta
Segunda,6887.0,41.0,-20.0,Segunda
Sexta,7342.0,177.0,-13.0,Sexta


In [None]:
dias_semana_ordenados = list(dias_traduzidos.values())
agrupados_sp = agrupados_sp.reindex(dias_semana_ordenados)
agrupados_sp

Unnamed: 0_level_0,Obitos,novosObitos,aceleracaoObitos,dias_da_semana
dia_da_semana,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Domingo,6846.0,60.0,-95.0,Domingo
Segunda,6887.0,41.0,-20.0,Segunda
Terça,6794.0,206.0,175.0,Terça
Quarta,6975.0,181.0,-25.0,Quarta
Quinta,7165.0,190.0,9.0,Quinta
Sexta,7342.0,177.0,-13.0,Sexta
Sábado,6786.0,155.0,-17.0,Sábado


In [None]:
fig = px.bar(agrupados_sp, x = 'dias_da_semana', y='novosObitos', color='dias_da_semana',template='none')
fig

In [None]:
fig = px.box(df_sp, x = 'dia_da_semana', y='novosObitos', template='none',
             category_orders = {'dia_da_semana': dias_semana_ordenados})
fig

In [None]:
def criar_features(df, label=None):
  df = df.copy()
  df['diasemana'] = df['mes'].dt.dayofweek
  df['mês'] = df['mes'].dt.month
  df['diamês'] = df['mes'].dt.day

  X = df[['diasemana', 'mês', 'diamês']]
  if label:
    y = df[[label]]
    return X,y
  return X

In [None]:
X, y = criar_features(df_sp, label='Obitos')

In [None]:
df_final = pd.concat([X,y], axis = 1)
df_final.head()

Unnamed: 0,diasemana,mês,diamês,Obitos
0,1,2,25,0
2,2,2,26,0
4,3,2,27,0
6,4,2,28,0
8,5,2,29,0


In [None]:
df_chart = df_final.dropna()
df_chart['mês'] = df_chart['mês'].astype(str)
x_axis = ['diasemana', 'diamês', 'mês']
figs_list = []
for x_i in x_axis:
  fig= px.scatter(df_chart, x = x_i, y='Obitos', 
                  color='mês', template='none')
  figs_list.append(fig)

fig0 = make_subplots(rows=1, cols=3)
for i, fig_i in enumerate(figs_list):
  if i > 0:
    fig_i.update_traces({'showlegend': False})
  for data_i in fig_i['data']:
    fig0.add_trace(data_i, row = 1, col = i+1)
    

fig0.update_layout({'title': {'text': 'Óbitos por dia da semana, do mês e por mês', 'font': {'size': 20}},
                    'xaxis': {'title': {'text': 'Dia da semana', 'font': {'size': 18}}},
                    'xaxis2': {'title': {'text': 'Dia do mês', 'font': {'size': 18}}},
                    'xaxis3': {'title': {'text': 'Mês', 'font': {'size': 18}}},
                    'yaxis': {'title': {'text': 'Óbitos', 'font': {'size': 18}}},
                    'template': 'none',
                    'legend': {'title': {'text': 'mês'}},
                    'width': 800
                    })
fig0