# Project 2 - Coronavirus

This project consist to estimate the SIR model with data from WHO.

In [1]:
import pandas as pd
import numpy as np
from scipy import integrate
from sklearn.metrics import mean_squared_error, mean_squared_log_error, mean_absolute_error, median_absolute_error
from scipy.integrate import odeint
from scipy.optimize import differential_evolution, minimize
import matplotlib.pyplot as plt
import PDEparams as pde
import array

## Data from World Health Organization
#### Only laboratory-confirmed, exclude clinically diagnose

In [2]:
data = pd.read_csv('CoV2019.csv')
china = data["China"]#data["China"][:27]
days = data["Days"]
total = data["Total"]
deaths_china = data["Death China"]
other = data["Other"]
china_total = data["China"]
days_total = data["Days"]
deaths_china_total = data["Death China"]
deaths_outside_total = data["Death Outside"]
len(deaths_china)
for i in range(len(days)-1):
    if other[i]+china[i]==total[i]:
        print('True')
    else:
        print('False')

True
False
False
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [3]:
count = 0
for a in data:
    print(a)
    count = count + 1
print('En total hay:', count, 'características.')

Date of report
Days
Total
China
Death China
Other
Death Outside
Death Globally
En total hay: 8 características.


### Defining the model

We use a SIR model:

$$\begin{align}
\frac{\mathrm{d} S}{\mathrm{d} t} &= -\beta\, \frac{SI}{N}\\
\frac{\mathrm{d} I}{\mathrm{d} t} &= \beta\, \frac{SI}{N} - \gamma\,I\\
\frac{\mathrm{d} R}{\mathrm{d} t} &= \gamma\,I
\end{align}$$

Susceptible -> Infected -> Recovered

$$\begin{align}
\beta &= \text{Contact Rate } \times \text{ Probability of Transmission}\\
\sigma &= \text{Incubation Rate}\\
\gamma &= \text{Recovery Rate}
\end{align}$$

Incubation Period: 1-14 Days, most commonly 5 days (WHO)

In [4]:
Hubei = 5917*10**4
Guangdong = 11346*10**4
Henan = 9605*10**4
Zhejiang = 5737*10**4
Hunan = 6899*10**4
Anhui = 6324*10**4
Jiangxi = 4648*10**4
N = 56*10**3   # estimate of people affected by lock down
#init_I = 1
#init_R = 1
#init_S = 5917*10**4

We define our DE 

In [67]:
def SIR(z, t, be, gm):
    '''The input z corresponds to the current state of the system, z = [x, y]. Since the input is in 1D, no 
    pre-processing is needed.
    
    t is the current time.
    
    a and b correspond to the unknown parameters.
    '''
    
    S, I = z
    
    return [ be*(S*I)/N-gm*I, gm*I]

### Using `PDEparams` to estimate parameters

First, we load the data from the `.csv` file.

Then we build the dataframe with data we want

The columns are, in order: $S$, $I$, $R$.

In [68]:
data.head()

Unnamed: 0,Date of report,Days,Total,China,Death China,Other,Death Outside,Death Globally
0,01/20/20,21,282,278,6,4,0,6
1,01/21/20,22,346,341,6,4,0,6
2,01/23/20,24,581,571,17,7,0,17
3,01/24/20,25,846,830,25,11,0,25
4,01/25/20,26,1320,1297,41,23,0,41


In [79]:
#lista de recuperados
R = []
#R.append(0)
#normalizar datos (empezar desde cero recuperados)
muertos_total=[]
muerto_dia = []
primer_muerto = data.loc[0,'Death China']
muerto_dia.append(0)
#normalizar la informacion sobre los muertos
for d in deaths_china:
    muerto = d - primer_muerto
    muertos_total.append(muerto)

#Ver el número de muertos por dia
#print(range(len(muertos_total)-1)

#print(muerto_dia)

#for i in range(len(muerto_dia)-1):
 #   recuperados = muerto_dia[i]
    #print(recuperados)
  #  R.append(recuperados)
#S.append(Hubei)

lst = []
ent = data.loc[0,'China']
#normalizar datos (empezar desde cero infectados)
for a in china:
    ent2 = a - ent  
    lst.append(ent2)
#print(lst)
#print(len(china))

#Ver el número de infectados por dia


S = []
# Susceptibles
#lista de susceptibles 
for a in I:
    b = Hubei - a
    S.append(b)
    
for i in range(len(muertos_total)-1):
    muertos_dia = muertos_total[i+1]-muertos_total[i]
    R.append(muertos_dia)

    I = []
for i in range(len(lst)-1):
    infected_dia = lst[i+1]-lst[i]
    infected = infected_dia
    I.append(infected)
print(S)
print(I)
print(R)
print(len(S))
print(len(I))
print(len(R))

[59169937, 59169770, 59169741, 59169533, 59169312, 59169224, 59168224, 59168540, 59168261, 59168016, 59167899, 59167410, 59167173, 59166767, 59166108, 59166303, 59166849, 59166613, 59167347, 59167016, 59167527, 59167978, 59168180, 59168002, 59168494, 59168880, 59150539, 59168107, 59168248, 59169605, 59169106, 59169177, 59169350, 59169780, 59169482, 59169589, 59169561, 59169669, 59169567, 59169815]
[63, 230, 259, 467, 688, 776, 1776, 1460, 1739, 1984, 2101, 2590, 2827, 3233, 3892, 3697, 3151, 3387, 2653, 2984, 2473, 2022, 1820, 1998, 1506, 1120, 19461, 1893, 1752, 395, 894, 823, 650, 220, 518, 411, 439, 331, 433, 185]
[0, 11, 8, 16, 15, 24, 26, 26, 38, 43, 46, 45, 57, 64, 66, 73, 73, 86, 89, 97, 108, 97, 254, 13, 143, 142, 106, 98, 136, 115, 118, 109, 97, 150, 71, 52, 29, 44, 47, 35]
40
40
40


In [80]:
for i in range(len(lst)-1):
    print(S[i]+I[i]+R[i])

59170000
59170011
59170008
59170016
59170015
59170024
59170026
59170026
59170038
59170043
59170046
59170045
59170057
59170064
59170066
59170073
59170073
59170086
59170089
59170097
59170108
59170097
59170254
59170013
59170143
59170142
59170106
59170098
59170136
59170115
59170118
59170109
59170097
59170150
59170071
59170052
59170029
59170044
59170047
59170035


In [71]:
dict = {'I': I,'S':S}
    
df = pd.DataFrame(dict) 

In [72]:
df.head()

Unnamed: 0,I,S
0,63,59169937
1,230,59169770
2,259,59169741
3,467,59169533
4,688,59169312


#### Constructing the `PDEmodel` object.

The inputs are

**Required:**
1. The data table `data`.
2. The model `SIR`.
3. The list of initial condition functions.
4. The bounds for the parameter values.

**Optional:**
1. The parameter names.
2. The number of variables: 2. **(Default is 1, this needs to be provided in this case)**
3. The number of spatial dimensions: 0. **(Default is 1, this needs to be provided in this case)**
4. The number of replicates in the data: 1. **(Default is 1, this needs to be provided in this case)**
5. The indices of the measured variables. In this case, the default `None`, since we have data for all 2 variables.
6. The function to apply to the output. In this case, the default `None`, since our data is directly $x$ and $y$.

In [73]:
def init_I():
    return 1

def init_R():
    return Hubei

In [74]:
my_model = pde.PDEmodel(df, SIR, [init_I, init_R], bounds=[(0, 1), (0,1)], 
                        param_names=[r'$be$', r'$gm$'], nvars=2, ndims=0, 
                        nreplicates=1, obsidx=None, outfunc=None)

In [75]:
my_model.initial_condition

array([       1, 59170000])

In [76]:
%%time
my_model.fit()

       $be$      $gm$
0  0.697378  0.803848
Wall time: 3min 46s


In [77]:
my_model.best_params

Unnamed: 0,$be$,$gm$
0,0.697378,0.803848


In [78]:
my_model.best_error

inf

In [17]:
%%time
my_model.likelihood_profiles()

HBox(children=(IntProgress(value=0, description='parameters', max=2, style=ProgressStyle(description_width='in…

HBox(children=(IntProgress(value=0, description='values within parameters', style=ProgressStyle(description_wi…

KeyboardInterrupt: 

In [18]:
my_model.result_profiles

AttributeError: 'PDEmodel' object has no attribute 'result_profiles'