# Os dados

Lendo e visualizandos os dados do Brasil do COVID até 

In [1]:

import pandas as pd

# reading csv file  
BRdata = pd.read_csv("./PG_IMT/DadosEpidemia/CoVid19.csv") 
BRdata.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
0,2020-02-25,Brazil,SP,TOTAL,0,0,1,1,0,0,0.0,0.00218,0.0,,,,
1,2020-02-25,Brazil,TOTAL,TOTAL,0,0,1,1,0,0,0.0,0.00048,0.0,,,,
2,2020-02-26,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,
3,2020-02-26,Brazil,TOTAL,TOTAL,0,0,0,1,0,1,0.0,0.00048,0.0,,,,
4,2020-02-27,Brazil,SP,TOTAL,0,0,0,1,0,1,0.0,0.00218,0.0,,,,


## Filtrando e limpando os dados

In [2]:

is_SP =  BRdata['state']=='TOTAL'
SPdata = BRdata[is_SP]

# SPdata = SPdata.fillna(0)

SPLimpo = SPdata[SPdata.recovered.notnull()]
SPLimpo.head()


Unnamed: 0,date,country,state,city,newDeaths,deaths,newCases,totalCases,deathsMS,totalCasesMS,deaths_per_100k_inhabitants,totalCases_per_100k_inhabitants,deaths_by_totalCases,recovered,suspects,tests,tests_per_100k_inhabitants
321,2020-03-23,Brazil,TOTAL,TOTAL,9,34,358,1952,34,1891,0.01618,0.92887,0.01742,8.0,15867.0,,
349,2020-03-24,Brazil,TOTAL,TOTAL,13,47,303,2255,46,2201,0.02237,1.07306,0.02084,20.0,17700.0,,
377,2020-03-25,Brazil,TOTAL,TOTAL,12,59,311,2566,57,2433,0.02808,1.22105,0.02299,27.0,27227.0,,
405,2020-03-26,Brazil,TOTAL,TOTAL,18,77,424,2990,77,2915,0.03664,1.42281,0.02575,42.0,48793.0,,
433,2020-03-27,Brazil,TOTAL,TOTAL,16,93,486,3476,92,3417,0.04425,1.65408,0.02675,42.0,50684.0,,


In [3]:

from datetime import datetime

first_date = SPLimpo["date"].iloc[0]
first_date = datetime.fromisoformat(first_date)

SPsir = SPLimpo[[ "totalCases", "deaths","recovered"]].to_numpy()
# SPsir = SPdata[[ "totalCases", "deaths","recovered"]].to_numpy()

SPsir[:,0:]
SPI = SPsir[:,0]
auxM = SPsir[:,1]
SPM = auxM
auxR = SPsir[:,2]
SPR = auxR


## Visualizando a evolução

In [4]:

from bokeh.models   import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d, HoverTool
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts  import column
from bokeh.io       import output_notebook

output_notebook()

from datetime import timedelta

# Criando o vetor de tempo
date_vec = [ first_date + timedelta(days=k) for k in range(len(SPM))]

# Criando os valores para legenda no plot
year =  [str(int(d.year)) for d in date_vec ]
month = [("0"+str(int(d.month)))[-2:] for d in date_vec ]
day =   [("0"+str(int(d.day)))[-2:] for d in date_vec ]

# Criando a fonte de dados
source = ColumnDataSource(data={
    'Data'       : date_vec,
    'd': day, 'm': month, 'y': year,
    'Infectados' : SPI,
    'Removidos'  : SPR,
    'Mortes'     : SPM,
})


# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime", 
           tools="", 
           toolbar_location=None,
           y_axis_type="log",
           title="Evolução do COVID - Brasil")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "bottom_right"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{0.00 a}'),
        ( 'Data',       '@d/@m/@y' ),
    ],
    renderers=[
        i_p, m_p, r_p
    ]
))

show(p)

# O problema

O conjunto de equações diferenciais que caracteriza o modelo é descrito abaixo. No modelo $\beta - \text{representa a taxa de transmissão ou taxa efetiva de contato} $  e $r - \text{a taxa de remoção ou recuperação.}$ 


$$ \begin{split}
   \frac{dS(t)}{dt} & = -\beta S(t) I(t) \\
   \frac{dI(t)}{dt} & = \beta S(t) I(t) - rI(t)  \\
   \frac{dR(t)}{dt} & = r I(t)
   \end{split}$$

   Gostaríamos de identificar quais parâmetros $\beta$ e $r$ resultam num melhor ajuste do modelo para os dados de **S**,**I** e **R**

In [5]:

# Tamanho da população - N
N = 220e6

# Importando o modelo SIR
from models import *

sir_model = ss.SIR()


# Estimando os parâmetros

Para estimarmos os parâmetros do modelo $\mathbf{\beta}$ e $\mathbf{r}$, vamos utilizar  inicialmente o método de mínimos quadrados. Podemos então formular o problema a partir da Equação abaixo. Na Equação $y_m(k)$ representa o dado real em cada amostra $k$; $y_s(\theta,k)$ representa o **valor estimado** a partir da simulação do modelo para uma determinada amostra $k$ e $\theta$ representa o vetor ed parâmetros $\theta = [ \beta \; \; r]^T$. 

$$ min_{\theta}= \sum_{k=1}^{K}(y_m(k) - y_s(\theta,k))^2 $$

A equação formula a pergunta: quais os valores de $beta$ e $r$ que minizam o erro quadrático quando comparados com os dados reais.

In [6]:

import numpy as np

SPS = N - SPI - SPR

time = np.linspace(0, len(SPI), len(SPI))

# Estimando os parâmetros
sir_model.fit(SPS, SPI, SPR, time, beta_sens=[1000,1], r_sens=[100,10])


	 ├─ S(0) ─   219998040.0   I(0) ─   1952.0   R(0) ─   8.0
	 ├─ beta ─   4.545495041683099e-09   r ─   0.024390243902439025
	 ├─ beta bound ─   4.545495041683098e-12  ─  4.545495041683099e-09
	 ├─ r bound ─   0.00024390243902439024  ─  0.24390243902439024
	 ├─ equation weights ─   [0.0004435994066128953, 1, 1]
	 └─ Defined at:  5.381092295930799e-10  ─  0.01996114877649821 



In [7]:

initial = [SPS[0], SPI[0], SPR[0]]

results = sir_model.predict(initial, time)


In [8]:

# Incluindo os dados de infectados
im_p = p.line(
    date_vec, results[1],
    legend_label="Infectados - Modelo",
    line_width=4,
    line_dash="dashed",
    line_cap="round",
    color="#f57f17"
)

# Incluindo os dados de recuperados
rm_p = p.line(
    date_vec, results[2],
    legend_label="Removidos - Modelo",
    line_dash="dashed",
    line_width=4,
    line_cap="round",
    color="#1b5e20"
)

show(p)


# Predições utilizando o modelo

In [9]:

# Criando os valores de tempo para previsão - 70 dias
t_sim = np.linspace(0, len(SPsir) + 70, len(SPsir) + 70)
date_vec_sim = [first_date + timedelta(days=k) for k in t_sim]

# Prevendo para os valores selecionados
prediction = sir_model.predict(initial, t_sim)


# Criando o gráfico com as predições

# Criando os valores para legenda no plot
year_sim =  [str(int(d.year)) for d in date_vec_sim ]
month_sim = [("0"+str(int(d.month)))[-2:] for d in date_vec_sim ]
day_sim =   [("0"+str(int(d.day)))[-2:] for d in date_vec_sim ]

# Criando a fonte de dados
source = ColumnDataSource(data={
    'Data'       : date_vec,
    'd': day, 'm': month, 'y': year,
    'Infectados' : SPI,
    'Removidos'  : SPR,
    'Mortes'     : SPM,
    'InfecModelo' : prediction[-1],
    'RemovModelo' : prediction[-2],
    'DataModelo'  : date_vec_sim,
    'ds': day_sim, 'ms': month_sim, 'ys': year_sim
})


# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime", 
           tools="", 
           toolbar_location=None,
           y_axis_type="log",
           title="Previsão do COVID - Brasil")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

mp_p = p.line(x='DataModelo', y='InfecModelo', legend_label="Mortes", line_dash="dashed", line_cap="round", line_width=4, color="#f57f17", source=source)
rp_p = p.line(x='DataModelo', y='RemovModelo', legend_label="Removidos", line_dash="dashed", line_cap="round", line_width=4, color="#1b5e20", source=source)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "bottom_right"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{0.00 a}' ),
        ( 'Data',       '@ds/@ms/@ys'),
    ],
    renderers=[
        i_p, m_p, r_p, mp_p, rp_p
    ]
))

show(p)




# Referências


- [Predictive Monitoring of COVID-19](https://ddi.sutd.edu.sg)

- [Apple mobility data](https://www.apple.com/covid19/mobility)

- [Corona Virus - Brazil Data](https://painel.covid19br.org)

- [Fitting model to Corona Virus](https://towardsdatascience.com/infectious-disease-modelling-fit-your-model-to-coronavirus-data-2568e672dbc7)