# Project 2 - Coronavirus

This project consist to estimate the SIR model with data from WHO.

In [2]:
import pandas as pd
import numpy as np
from scipy import integrate
from sklearn.metrics import mean_squared_error, mean_squared_log_error, mean_absolute_error, median_absolute_error
from scipy.integrate import odeint
from scipy.optimize import differential_evolution, minimize
import matplotlib.pyplot as plt
import PDEparams as pde

## Data from World Health Organization
#### Only laboratory-confirmed, exclude clinically diagnose

In [3]:
data = pd.read_csv('CoV2019.csv')
china = data["China"][:27]#data["China"][:27]
days = data["Days"][:27]
total = data["Total"][:27]
deaths_china = data["Death China"][:27]
other = data["Other"]
china_total = data["China"]
days_total = data["Days"]
deaths_china_total = data["Death China"]
deaths_outside_total = data["Death Outside"]

In [15]:
count = 0
for a in data:
    print(a)
    count = count + 1
print('En total hay:', count, 'características.')

Date of report
Days
Total
China
Death China
Other
Death Outside
Death Globally
En total hay: 8 características.


### Defining the model

We use a SIR model:

$$\begin{align}
\frac{\mathrm{d} S}{\mathrm{d} t} &= -\beta\, \frac{SI}{N}\\
\frac{\mathrm{d} I}{\mathrm{d} t} &= \beta\, \frac{SI}{N} - \gamma\,I\\
\frac{\mathrm{d} R}{\mathrm{d} t} &= \gamma\,I
\end{align}$$

Susceptible -> Infected -> Recovered

$$\begin{align}
\beta &= \text{Contact Rate } \times \text{ Probability of Transmission}\\
\sigma &= \text{Incubation Rate}\\
\gamma &= \text{Recovery Rate}
\end{align}$$

Incubation Period: 1-14 Days, most commonly 5 days (WHO)

In [11]:
Hubei = 5917*10**4
Guangdong = 11346*10**4
Henan = 9605*10**4
Zhejiang = 5737*10**4
Hunan = 6899*10**4
Anhui = 6324*10**4
Jiangxi = 4648*10**4
N = 56*10**3                        # estimate of people affected by lock down
init_I = 1
init_R = 1

We define our DE 

In [12]:
def SIR(z, t, be, gm):
    '''The input z corresponds to the current state of the system, z = [x, y]. Since the input is in 1D, no 
    pre-processing is needed.
    
    t is the current time.
    
    a and b correspond to the unknown parameters.
    '''
    
    S, I, R = z
    
    return [-be (S*I)/N, be (S*I)/N-gm*I, gm]

### Using `PDEparams` to estimate parameters

First, we load the data from the `.csv` file.

Then we build the dataframe with data we want

The columns are, in order: $t$, $x$, $y$.

In [33]:
# Susceptibles
S = []
ent = data[4,1]
for a in china:
    #if a == 278:
        #print(0)
     #   ent = a
      #  S.append(0)
    ent = a - ent
    S.append(a)
print(S)

[0, 341, 571, 830, 1297, 1985, 2761, 4537, 5997, 7736, 9720, 11821, 14411, 17238, 20471, 24363, 28060, 31211, 34598, 37251, 40235, 42708, 44730, 46550, 48548, 50054, 51174]


In [18]:
print(data)

   Date of report  Days  Total  China  Death China  Other  Death Outside  \
0        01/20/20    21    282    278            6      4              0   
1        01/21/20    22    346    341            6      4              0   
2        01/23/20    24    581    571           17      7              0   
3        01/24/20    25    846    830           25     11              0   
4        01/25/20    26   1320   1297           41     23              0   
5        01/26/20    27   2014   1985           56     29              0   
6        01/27/20    28   2798   2761           80     37              0   
7        01/28/20    29   4593   4537          106     56              0   
8        01/29/20    30   6065   5997          132     68              0   
9        01/30/20    31   7818   7736          170     82              0   
10       01/31/20    32   9826   9720          213    106              0   
11     02/01/2020    33  11953  11821          259    132              0   
12     02/02

#### Constructing the `PDEmodel` object.

The inputs are

**Required:**
1. The data table `data`.
2. The model `LotkaVolterra`.
3. The list of initial condition functions.
4. The bounds for the parameter values.

**Optional:**
1. The parameter names.
2. The number of variables: 2. **(Default is 1, this needs to be provided in this case)**
3. The number of spatial dimensions: 0. **(Default is 1, this needs to be provided in this case)**
4. The number of replicates in the data: 1. **(Default is 1, this needs to be provided in this case)**
5. The indices of the measured variables. In this case, the default `None`, since we have data for all 2 variables.
6. The function to apply to the output. In this case, the default `None`, since our data is directly $x$ and $y$.

In [None]:
Primer