# Os dados

Lendo e visualizandos os dados do Brasil do COVID fornecidos por

- [Wesley Cota](https://github.com/wcota/covid19br)

Os dados são atualizados diariamente... Então normalmente a cada dia esse pipeline pode mudar seus resultados.

In [1]:

import pandas as pd

# Lendo os dados online - wcota
data_path = 'https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv'

online_data = pd.read_csv(data_path, delimiter=",") 
online_data.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
0,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
1,2020-02-25,Brazil,TOTAL,TOTAL,0,0,1,1,0,0,0.0,0.00048,0.0,,,,
2,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
3,2020-02-26,Brazil,TOTAL,TOTAL,0,0,0,1,0,1,0.0,0.00048,0.0,,,,
4,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,


## Filtrando e limpando os dados

In [2]:

selected_state = "TOTAL"

at_state = online_data['state']==selected_state
local_data = online_data[at_state]
local_data = local_data[local_data.recovered.notnull()]
#local_data = local_data.fillna(method="backfill")

local_data.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
321,2020-03-23,Brazil,TOTAL,TOTAL,9,34,358,1952,34,1891,0.01618,0.92887,0.01742,8.0,15867.0,,
349,2020-03-24,Brazil,TOTAL,TOTAL,13,47,303,2255,46,2201,0.02237,1.07306,0.02084,20.0,17700.0,,
377,2020-03-25,Brazil,TOTAL,TOTAL,12,59,311,2566,57,2433,0.02808,1.22105,0.02299,27.0,27227.0,,
405,2020-03-26,Brazil,TOTAL,TOTAL,18,77,424,2990,77,2915,0.03664,1.42281,0.02575,42.0,48793.0,,
433,2020-03-27,Brazil,TOTAL,TOTAL,16,93,486,3476,92,3417,0.04425,1.65408,0.02675,42.0,50684.0,,


In [3]:

import numpy as np
from datetime import datetime

first_date = local_data["date"].iloc[0]
first_date = datetime.fromisoformat(first_date)

if selected_state == "SP":
#     N = 11869660
    N = 44.01e6
elif selected_state == "TOTAL":
    N = 220e6
    
I = list()                                       # <- I(t)
R = local_data["recovered"].iloc[1:].to_numpy()  # <- R(t)
M = local_data["newDeaths"].iloc[1:].to_numpy()  # <- M(t)
nR = np.diff(local_data["recovered"].to_numpy()) # <- dR(t)/dt
nC = local_data["newCases"].iloc[1:].to_numpy()  # <- nC(t)/dt

I = [ local_data["totalCases"].iloc[1] ]         # I(0)

# I(t) <- I(t-1) + newCases(t) - newMortes(t) - newRecovered(t)
for t in range(len(M)-1):
    I.append(I[-1] + nC[t] - M[t] - nR[t])
I = np.array(I)


## Visualizando a evolução

In [4]:

from bokeh.models   import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d, HoverTool
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts  import column
from bokeh.io       import output_notebook

output_notebook()

from datetime import timedelta

# Criando o vetor de tempo
date_vec = [ first_date + timedelta(days=k) for k in range(len(M))]

# Criando os valores para legenda no plot
year =  [str(int(d.year)) for d in date_vec ]
month = [("0"+str(int(d.month)))[-2:] for d in date_vec ]
day =   [("0"+str(int(d.day)))[-2:] for d in date_vec ]



# Criando a fonte de dados
source = ColumnDataSource(data={
    'Data'       : date_vec,
    'd': day, 'm': month, 'y': year,
    'Infectados' : I,
    'Removidos'  : R,
    'Mortes'     : M,
})


# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime", 
           tools="", 
           toolbar_location=None,
           y_axis_type="log",
           title="Evolução do COVID - São Paulo")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{i}'),
        ( 'Data',       '@d/@m/@y' ),
    ],
    renderers=[
        r_p, i_p, m_p
    ]
))

show(p)


# Reamostrando os dados



In [5]:

import scipy.signal as scs

safe_reduce = 24

# Resample the Infected data
Id_mirrored = np.concatenate((I, I[::-1]))
Id_expanded = scs.resample_poly(Id_mirrored,
                                len(Id_mirrored)*24,
                                len(Id_mirrored),
                                window=('kaiser', 35.0))

Id_resampled = Id_expanded[:int(len(Id_expanded)/2)-safe_reduce]

# Resample the recovered data
Rd_mirrored = np.concatenate((R, R[::-1]))
Rd_expanded = scs.resample_poly(Rd_mirrored, 
                                len(Rd_mirrored)*24, 
                                len(Rd_mirrored),
                                window=('kaiser', 35.0))

Rd_resampled = Rd_expanded[:int(len(Rd_expanded)/2)-safe_reduce]

# Create the resampled time vector
td_resampled = np.linspace(0, len(I), int(len(Rd_expanded)/2))[:-safe_reduce]

# Update the used variables
Id, Rd, td = Id_resampled, Rd_resampled, td_resampled
Sd = N - Id - Rd


In [9]:

p1 = figure(plot_height=500,
           plot_width=600,
           tools="", 
           toolbar_location=None,
           title="Resultados da reamostragem - COVID")

t_bef = range(len(I))

p1.scatter(td, Id, legend_label="Infectados - Interp", 
           fill_alpha=0.1, line_alpha=0.1, size=5, line_color="#f57f17", fill_color="#f57f17")
p1.scatter(td, Rd, legend_label="Removidos - Interp", 
           fill_alpha=0.1, line_alpha=0.1, size=5, line_color="#1b5e20", fill_color="#1b5e20")

p1.line(t_bef, I, legend_label="Infectados", line_dash="dashed", line_cap="round", line_width=3, color="#ffd885")
p1.line(t_bef, R, legend_label="Removidos", line_dash="dashed", line_cap="round", line_width=3, color="#99d594")

p1.legend.click_policy="hide"
p1.legend.location = "top_left"

show(p1)


# Determinação do $R_0$ no tempo 

O conjunto de equações diferenciais que caracteriza o modelo é descrito abaixo. No modelo $\beta - \text{representa a taxa de transmissão ou taxa efetiva de contato} $  e $r - \text{a taxa de remoção ou recuperação.}$ 


$$ \begin{split}
   \frac{dS(t)}{dt} & = -\beta S(t) I(t) \\
   \frac{dI(t)}{dt} & = \beta S(t) I(t) - rI(t)  \\
   \frac{dR(t)}{dt} & = r I(t)
   \end{split}$$

   Gostaríamos de identificar quais parâmetros $\beta$ e $r$ resultam num melhor ajuste do modelo para os dados de **S**,**I** e **R**. E com isso determinar a relação $R_0 = \beta / r$ para cada amostra incluída nos dados.

In [None]:

initial = 25 # <- Pelo menos 25 dias
step = 0.25  # <- Passo da resolução em dias

study_data = dict(t=[], pars=[])
study_samples = range(24*initial, len(Id), step * 24)

for sample in study_samples:
    S_, I_ = Sd[:sample], Id[:sample]
    R_, t_ = Rd[:sample], td[:sample]
    
    model = ss.SIR(pop=N, verbose=False)
    model.fit(S_, I_, R_, t_, beta_sens=[1000,10], r_sens=[1000,10])
    
    study_data["t"].append(t_[-1])
    study_data["pars"].append(model.parameters)


# Avaliando o parâmetro $R_0$

In [None]:

beta = [p[0] for p in study_data["pars"]]
r    = [p[1] for p in study_data["pars"]]
Ro = [p[0] / p[1] for p in study_data["pars"]]

p1 = figure(plot_height=250,
           plot_width=600,
           tools="", 
           toolbar_location=None,
           title="Resultados do parâmetro r - COVID")

p1.line(study_data["t"], r, legend_label="r - parameter", line_cap="round", line_width=3, color="#ffd885")

p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = "Tempo"

p2 = figure(plot_height=250,
           plot_width=600,
           tools="", 
           toolbar_location=None,
           title="Resultados do parâmetro beta - COVID")

p2.line(study_data["t"], beta, legend_label="beta - parameter", line_cap="round", line_width=3, color="#99d594")

p2.grid.grid_line_alpha = 0
p2.ygrid.band_fill_color = "olive"
p2.ygrid.band_fill_alpha = 0.1
p2.xaxis.axis_label = "Tempo"

p3 = figure(plot_height=250,
           plot_width=600,
           tools="", 
           toolbar_location=None,
           title="Resultados do parâmetro Ro - COVID")

p3.line(study_data["t"], Ro, legend_label="Ro - parameter", line_cap="round", line_width=3, color="#99d594")

p3.grid.grid_line_alpha = 0
p3.ygrid.band_fill_color = "olive"
p3.ygrid.band_fill_alpha = 0.1
p3.xaxis.axis_label = "Tempo"

show(column(p1,p2,p3))
