# Os dados

Lendo e visualizandos os dados do Brasil do COVID até 

In [1]:

import pandas as pd

# Lendo o arquivo csv
data_path = './PG_IMT/DadosEpidemia/CoVid19.csv'

BRdata = pd.read_csv(data_path) 
BRdata.head()

# Lendo os dados online
data_path = 'https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv'

SPdata = pd.read_csv(data_path, delimiter=",") 
SPdata.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
0,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
1,2020-02-25,Brazil,TOTAL,TOTAL,0,0,1,1,0,0,0.0,0.00048,0.0,,,,
2,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
3,2020-02-26,Brazil,TOTAL,TOTAL,0,0,0,1,0,1,0.0,0.00048,0.0,,,,
4,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,


## Filtrando e limpando os dados

In [2]:

is_TOTAL = BRdata['state']=='TOTAL'
BRdata = BRdata[is_TOTAL]

BRLimpo = BRdata[BRdata.recovered.notnull()]
BRLimpo.head()



is_state = SPdata['state']=="SP"
SPdata = SPdata[is_state]

SPdata = SPdata[SPdata.recovered.notnull()]
SPdata.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
347,2020-03-24,Brazil,SP,TOTAL,10,40,65,810,40,810,0.08711,1.76397,0.04938,1.0,4572.0,,
375,2020-03-25,Brazil,SP,TOTAL,8,48,52,862,48,862,0.10453,1.87722,0.05568,1.0,4300.0,,
403,2020-03-26,Brazil,SP,TOTAL,10,58,191,1053,58,1052,0.12631,2.29317,0.05508,1.0,14312.0,,
431,2020-03-27,Brazil,SP,TOTAL,10,68,170,1223,68,1223,0.14809,2.66338,0.0556,1.0,14312.0,,
459,2020-03-28,Brazil,SP,TOTAL,16,84,183,1406,84,1406,0.18293,3.06191,0.05974,1.0,14312.0,,


In [3]:

import numpy as np
from datetime import datetime

only_SP = True

first_date = SPdata["date"].iloc[0]
first_date = datetime.fromisoformat(first_date)

if only_SP:
    
    N = 11869660
    
    I = SPdata["totalCases"].to_numpy()
    M = SPdata["deaths"].to_numpy()
    R = SPdata["recovered"].to_numpy()
    
else:
    
    N = 220e6

    SIR = BRLimpo[[ "totalCases", "deaths","recovered"]].to_numpy()

    I = SIR[:,0]
    M = SIR[:,1]
    R = SIR[:,2]


## Visualizando a evolução

In [4]:

from bokeh.models   import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d, HoverTool
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts  import column
from bokeh.io       import output_notebook

output_notebook()

from datetime import timedelta

# Criando o vetor de tempo
date_vec = [ first_date + timedelta(days=k) for k in range(len(M))]

# Criando os valores para legenda no plot
year =  [str(int(d.year)) for d in date_vec ]
month = [("0"+str(int(d.month)))[-2:] for d in date_vec ]
day =   [("0"+str(int(d.day)))[-2:] for d in date_vec ]

# Criando a fonte de dados
source = ColumnDataSource(data={
    'Data'       : date_vec,
    'd': day, 'm': month, 'y': year,
    'Infectados' : I,
    'Removidos'  : R,
    'Mortes'     : M,
})


# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime", 
           tools="", 
           toolbar_location=None,
           y_axis_type="log",
           title="Evolução do COVID - Brasil")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{0.00 a}'),
        ( 'Data',       '@d/@m/@y' ),
    ],
    renderers=[
        i_p, m_p, r_p
    ]
))

show(p)

# O problema

O conjunto de equações diferenciais que caracteriza o modelo é descrito abaixo. No modelo $\beta - \text{representa a taxa de transmissão ou taxa efetiva de contato} $  e $r - \text{a taxa de remoção ou recuperação.}$ 


$$ \begin{split}
   \frac{dS(t)}{dt} & = -\beta S(t) I(t) \\
   \frac{dI(t)}{dt} & = \beta S(t) I(t) - rI(t)  \\
   \frac{dR(t)}{dt} & = r I(t)
   \end{split}$$

   Gostaríamos de identificar quais parâmetros $\beta$ e $r$ resultam num melhor ajuste do modelo para os dados de **S**,**I** e **R**

In [5]:

# Importando o modelo SIR
from models import *

sir_model = ss.SIR()


# Estimando os parâmetros

Para estimarmos os parâmetros do modelo $\mathbf{\beta}$ e $\mathbf{r}$, vamos utilizar  inicialmente o método de mínimos quadrados. Podemos então formular o problema a partir da Equação abaixo. Na Equação $y_m(k)$ representa o dado real em cada amostra $k$; $y_s(\theta,k)$ representa o **valor estimado** a partir da simulação do modelo para uma determinada amostra $k$ e $\theta$ representa o vetor ed parâmetros $\theta = [ \beta \; \; r]^T$. 

$$ min_{\theta}= \sum_{k=1}^{K}(y_m(k) - y_s(\theta,k))^2 $$

A equação formula a pergunta: quais os valores de $beta$ e $r$ que minizam o erro quadrático quando comparados com os dados reais.

In [6]:

import numpy as np

S = N - I - R

time = np.linspace(0, len(I), len(I))

# Estimando os parâmetros
sir_model.fit(S, I, None, time, beta_sens=[1000,1], r_sens=[100,10])

r_included = False


	 ├─ S(0) ─ I(0) ─ R(0) ─  [11868849.0, 810]
	 ├─ beta ─   8.42541682011457e-08   r ─   0.015384615384615385
	 ├─ beta bound ─   8.42541682011457e-11  ─  8.42541682011457e-08
	 ├─ r bound ─   0.00015384615384615385  ─  0.15384615384615385
	 ├─ equation weights ─   [0.00724729078615795, 1, 1]
	 └─ Defined at:  1.7959279357833352e-08  ─  0.13628588022674576 



In [7]:

if r_included:
    initial = [S[0], I[0], R[0]]
else:
    initial = [S[0], I[0]]
    
results = sir_model.predict(initial, time)


In [8]:

# Incluindo os dados de infectados
im_p = p.line(
    date_vec, results[1],
    legend_label="Infectados - Modelo",
    line_width=4,
    line_dash="dashed",
    line_cap="round",
    color="#f57f17"
)

# Incluindo os dados de recuperados
if r_included:
    rm_p = p.line(
        date_vec, results[2],
        legend_label="Removidos - Modelo",
        line_dash="dashed",
        line_width=4,
        line_cap="round",
        color="#1b5e20"
    )

show(p)


# Predições utilizando o modelo

In [9]:

# Criando os valores de tempo para previsão - 70 dias
t_sim = np.linspace(0, len(I) + 70, len(I) + 70)
date_vec_sim = [first_date + timedelta(days=k) for k in t_sim]

# Prevendo para os valores selecionados
prediction = sir_model.predict(initial, t_sim)



# Criando o gráfico com as predições

# Criando os valores para legenda no plot
year_sim =  [str(int(d.year)) for d in date_vec_sim ]
month_sim = [("0"+str(int(d.month)))[-2:] for d in date_vec_sim ]
day_sim =   [("0"+str(int(d.day)))[-2:] for d in date_vec_sim ]

# Criando a fonte de dados
if r_included:
    source = ColumnDataSource(data={
        'Data'       : date_vec,
        'd': day, 'm': month, 'y': year,
        'Infectados' : I,
        'Removidos'  : R,
        'Mortes'     : M,
        'InfecModelo' : prediction[1],
        'RemovModelo' : prediction[2],
        'DataModelo'  : date_vec_sim,
        'ds': day_sim, 'ms': month_sim, 'ys': year_sim
    })
else:
    source = ColumnDataSource(data={
        'Data'       : date_vec,
        'd': day, 'm': month, 'y': year,
        'Infectados' : I,
        'Removidos'  : R,
        'Mortes'     : M,
        'InfecModelo' : prediction[1],
        'DataModelo'  : date_vec_sim,
        'ds': day_sim, 'ms': month_sim, 'ys': year_sim
    })

    
# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime", 
           tools="", 
           toolbar_location=None,
           y_axis_type="log",
           title="Previsão do COVID - Brasil")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

mp_p = p.line(x='DataModelo', y='InfecModelo', legend_label="Infectados - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#f57f17", source=source)

renders = [i_p, m_p, r_p, mp_p]

if r_included:
    rp_p = p.line(x='DataModelo', y='RemovModelo', legend_label="Removidos - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#1b5e20", source=source)
    renders.append(rp_p)
    
# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "bottom_right"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{0.00 a}' ),
        ( 'Data',       '@ds/@ms/@ys'),
    ],
    renderers=renders
))

show(p)




# Referências


- [Predictive Monitoring of COVID-19](https://ddi.sutd.edu.sg)

- [Apple mobility data](https://www.apple.com/covid19/mobility)

- [Corona Virus - Brazil Data](https://painel.covid19br.org)

- [Fitting model to Corona Virus](https://towardsdatascience.com/infectious-disease-modelling-fit-your-model-to-coronavirus-data-2568e672dbc7)