# Project 2 - Coronavirus

This project consist to estimate the SIR model with data from WHO.

In [1]:
import pandas as pd
import numpy as np
from scipy import integrate
from sklearn.metrics import mean_squared_error, mean_squared_log_error, mean_absolute_error, median_absolute_error
from scipy.integrate import odeint
from scipy.optimize import differential_evolution, minimize
import matplotlib.pyplot as plt
import PDEparams as pde
import array

## Data from World Health Organization
#### Only laboratory-confirmed, exclude clinically diagnose

In [None]:
data = pd.read_csv('CoV2019.csv')
china = data["China"]#data["China"][:27]
days = data["Days"]
total = data["Total"]
deaths_china = data["Death China"]
other = data["Other"]
china_total = data["China"]
days_total = data["Days"]
deaths_china_total = data["Death China"]
deaths_outside_total = data["Death Outside"]
len(deaths_china)
for i in range(len(days)-1):
    if other[i]+china[i]==total[i]:
        print('True')
    else:
        print('False')

In [None]:
count = 0
for a in data:
    print(a)
    count = count + 1
print('En total hay:', count, 'características.')

### Defining the model

We use a SIR model:

$$\begin{align}
\frac{\mathrm{d} S}{\mathrm{d} t} &= -\beta\, \frac{SI}{N}\\
\frac{\mathrm{d} I}{\mathrm{d} t} &= \beta\, \frac{SI}{N} - \gamma\,I\\
\frac{\mathrm{d} R}{\mathrm{d} t} &= \gamma\,I
\end{align}$$

Susceptible -> Infected -> Recovered

$$\begin{align}
\beta &= \text{Contact Rate } \times \text{ Probability of Transmission}\\
\sigma &= \text{Incubation Rate}\\
\gamma &= \text{Recovery Rate}
\end{align}$$

Incubation Period: 1-14 Days, most commonly 5 days (WHO)

In [None]:
Hubei = 5917*10**4
Guangdong = 11346*10**4
Henan = 9605*10**4
Zhejiang = 5737*10**4
Hunan = 6899*10**4
Anhui = 6324*10**4
Jiangxi = 4648*10**4
N = 56*10**3   # estimate of people affected by lock down
#init_I = 1
#init_R = 1
#init_S = 5917*10**4

We define our DE 

In [None]:
def SIR(z, t, be, gm):
    '''The input z corresponds to the current state of the system, z = [x, y]. Since the input is in 1D, no 
    pre-processing is needed.
    
    t is the current time.
    
    a and b correspond to the unknown parameters.
    '''
    
    S, I = z
    
    return [ be*(S*I)/N-gm*I, gm*I]

### Using `PDEparams` to estimate parameters

First, we load the data from the `.csv` file.

Then we build the dataframe with data we want

The columns are, in order: $S$, $I$, $R$.

In [None]:
data.head()

In [None]:
#lista de recuperados
R = []
S = []
I = []
# infectados
for i in range(len(china)):
    c=china[i]-deaths_china[i]
    I.append(c)
    S.append(Hubaei-c)
    R.append(deaths_china[i])


print(I)
print(S)
print(R)
print(len(S))
print(len(I))
print(len(R))

In [None]:
for i in range(len(china)):
    print(S[i]+I[i]+R[i])

In [71]:
dict = {'I': I,'S':S}
    
df = pd.DataFrame(dict) 

In [72]:
df.head()

Unnamed: 0,I,S
0,63,59169937
1,230,59169770
2,259,59169741
3,467,59169533
4,688,59169312


#### Constructing the `PDEmodel` object.

The inputs are

**Required:**
1. The data table `data`.
2. The model `SIR`.
3. The list of initial condition functions.
4. The bounds for the parameter values.

**Optional:**
1. The parameter names.
2. The number of variables: 2. **(Default is 1, this needs to be provided in this case)**
3. The number of spatial dimensions: 0. **(Default is 1, this needs to be provided in this case)**
4. The number of replicates in the data: 1. **(Default is 1, this needs to be provided in this case)**
5. The indices of the measured variables. In this case, the default `None`, since we have data for all 2 variables.
6. The function to apply to the output. In this case, the default `None`, since our data is directly $x$ and $y$.

In [73]:
def init_I():
    return 1

def init_R():
    return Hubei

In [74]:
my_model = pde.PDEmodel(df, SIR, [init_I, init_R], bounds=[(0, 1), (0,1)], 
                        param_names=[r'$be$', r'$gm$'], nvars=2, ndims=0, 
                        nreplicates=1, obsidx=None, outfunc=None)

In [75]:
my_model.initial_condition

array([       1, 59170000])

In [76]:
%%time
my_model.fit()

       $be$      $gm$
0  0.697378  0.803848
Wall time: 3min 46s


In [77]:
my_model.best_params

Unnamed: 0,$be$,$gm$
0,0.697378,0.803848


In [78]:
my_model.best_error

inf

In [17]:
%%time
my_model.likelihood_profiles()

HBox(children=(IntProgress(value=0, description='parameters', max=2, style=ProgressStyle(description_width='in…

HBox(children=(IntProgress(value=0, description='values within parameters', style=ProgressStyle(description_wi…

KeyboardInterrupt: 

In [18]:
my_model.result_profiles

AttributeError: 'PDEmodel' object has no attribute 'result_profiles'