<a id=top></a>

# AM10 - Spring 2020 

## Lecture 10 - NEW

## Exploratory Data Analysis of the COVID-19 Pandemic

## The Forced SIR model

#### &copy; 2020 Eleni Angelaki Kaxiras, Efthimios Kaxiras

<BR>

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
#matplotlib.style.use('ggplot')
%matplotlib inline

In [2]:
import scipy.stats as st
from scipy.optimize import curve_fit
from scipy.stats import norm

In [3]:
import pandas as pd
import seaborn as sns
import warnings
import glob
# extend pandas printing instead of ...
pd.options.display.max_columns = 500  
pd.options.display.max_rows = 3000     
pd.options.display.max_colwidth = 200
pd.options.display.precision = 3

### Import Daily Data from:
https://ourworldindata.org/coronavirus-source-data 
(Sourced from the European Centre for Disease)

In [4]:
data = 'https://opendata.ecdc.europa.eu/covid19/casedistribution/csv'
daily_coronadata = pd.read_csv(data, index_col=False)
daily_coronadata

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018
0,10/04/2020,10,4,2020,61,1,Afghanistan,AF,AFG,3.717e+07
1,09/04/2020,9,4,2020,56,3,Afghanistan,AF,AFG,3.717e+07
2,08/04/2020,8,4,2020,30,4,Afghanistan,AF,AFG,3.717e+07
3,07/04/2020,7,4,2020,38,0,Afghanistan,AF,AFG,3.717e+07
4,06/04/2020,6,4,2020,29,2,Afghanistan,AF,AFG,3.717e+07
5,05/04/2020,5,4,2020,35,1,Afghanistan,AF,AFG,3.717e+07
6,04/04/2020,4,4,2020,0,0,Afghanistan,AF,AFG,3.717e+07
7,03/04/2020,3,4,2020,43,0,Afghanistan,AF,AFG,3.717e+07
8,02/04/2020,2,4,2020,26,0,Afghanistan,AF,AFG,3.717e+07
9,01/04/2020,1,4,2020,25,0,Afghanistan,AF,AFG,3.717e+07


In [5]:
#
# Find the 3-letter acronym for each country
#
country = 'China'
daily_coronadata[daily_coronadata['countriesAndTerritories']==country].countryterritoryCode.iloc[0]

'CHN'

In [6]:
#
#  Display all the data for a country
#
country = 'United_States_of_America'
daily_coronadata[daily_coronadata['countriesAndTerritories']==country].head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018
9580,10/04/2020,10,4,2020,33901,1873,United_States_of_America,US,USA,327200000.0
9581,09/04/2020,9,4,2020,33323,1922,United_States_of_America,US,USA,327200000.0
9582,08/04/2020,8,4,2020,30613,1906,United_States_of_America,US,USA,327200000.0
9583,07/04/2020,7,4,2020,30561,1342,United_States_of_America,US,USA,327200000.0
9584,06/04/2020,6,4,2020,25398,1146,United_States_of_America,US,USA,327200000.0


In [7]:
#
#  Find the population of a country
#
country = 'United_States_of_America'
popul_USA=daily_coronadata[daily_coronadata['countriesAndTerritories']==country].popData2018.iloc[0]
print(' Population U.S.A. %11i' %popul_USA)

 Population U.S.A.   327167434


In [None]:
#
# Calculate t0, the day the exponential increase started
# (CHN = 17)
# (USA = 59)
#

In [None]:
country = 'China'
popul_CHN=daily_coronadata[daily_coronadata['countriesAndTerritories']==country].popData2018.iloc[0]
daily_all_CHN = daily_coronadata[daily_coronadata['countriesAndTerritories']==country]['cases'].to_numpy()[::-1]

In [None]:
daily_CHN = daily_all_CHN[0:]
daily_CHN

In [None]:
fig=plt.figure(figsize=(6,5))
plt.plot(daily_CHN,label='CHN')
plt.plot(daily_USA,label='USA')
plt.xlabel('Day since $t_0$')
plt.ylabel('Daily cases')
plt.legend()
fig.savefig('Total_10.png',format='png')

In [None]:
# Total population (N) and number of days (Ndays) of epidemic duration
N = 1000
Ndays=100

# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta = 0.3
gamma = 1.0/10

# Generating the time array:
t = np.linspace(0, Ndays, Ndays)
dt=t[1]-t[0]

# Initializing the S, I, R arrays
S=np.zeros(Ndays)
I=np.zeros(Ndays)
R=np.zeros(Ndays)

# start with one infected individual

S[0]=N
I[0]=1
R[0]=0

# Solving the system of differential equations
for n in range(Ndays-1):
    S[n+1]=S[n]+dt*(-beta*S[n]*I[n]/N)
    I[n+1]=I[n]+dt*(beta*S[n]*I[n]/N - gamma*I[n])
    R[n+1]=R[n]+dt*(gamma*I[n])
    
fig=plt.figure(figsize=(6,5))
plt.plot(t,S,'o',ms=4,label='Susceptible')
plt.plot(t,I,'o',ms=4,label='Infectious')
plt.plot(t,R,'o',ms=4,label='Recovered')
plt.xlabel('Time (days)')
plt.ylabel('Number')
plt.ylim(0,01.05*N)
plt.legend()
plt.show()

In [None]:
# The sigmoid function (a version of the theta-function)


In [None]:
# Fit to model

# parameters for beta=0.3, gamma=1/10   
a1=0.165
a2=0.135
N2=N-55
t1=35.5
dt=9.0
t2=t1+dt

Smod=N-N2*sigmoid(a1,t1,t)
Rmod=N2*sigmoid(a2,t2,t)

fig=plt.figure(figsize=(6,5))
plt.plot(t,S,'o',ms=4,label='Susceptible')
plt.plot(t,I,'o',ms=4,label='Infectious')
plt.plot(t,R,'o',ms=4,label='Recovered')

plt.plot(t,Smod,'r-')
plt.plot(t,Rmod,'k-')
plt.plot(t,N-Smod-Rmod,'b-')
plt.xlabel('Time (days)')
plt.ylabel('Number')
plt.ylim(0,01.05*N)
plt.legend()
plt.show()
fig.savefig('FSIR_model.png',format='png')

In [None]:
# Define here the model function for I(t) -difference of 2 sigmoids

In [None]:
# CHN daily data and parapmeters

fdat = daily_CHN
xdat = np.arange(len(fdat))
    
popt, pcov = curve_fit(daily_fit, xdat, fdat)
print(popt)
t1=popt[0]
dt=popt[1]
dn=popt[2]

ffit=daily_fit(xdat, t1, dt, dn)

fig=plt.figure(figsize=(10,5))
plt.scatter(xdat,fdat)
plt.plot(xdat,ffit,'g')
plt.title('CHN: 4/10/2020')
plt.ylabel('Daily cases')
plt.xlabel('$t-t_0$ (days)')
plt.show();
fig.savefig('Daily_CHN.png',format='png')

In [None]:
# CHN CUMULATIVE data and externsion (PREDICTION)

cumm_dat=np.zeros(len(fdat))
cumm_dat[0]=fdat[0]
for i in range (1,len(fdat)):
    cumm_dat[i]=cumm_dat[i-1]+fdat[i]

T_ext=10
xdat_ext = np.arange(len(fdat)+T_ext)
ffit_ext=daily_fit(xdat_ext, t1, dt, dn)
cumm_fit=np.zeros(len(fdat)+T_ext)
cumm_fit[0]=ffit[0]
for i in range (1,len(fdat)+T_ext):
    cumm_fit[i]=cumm_fit[i-1]+ffit_ext[i]
    
fig=plt.figure(figsize=(10,5))
plt.scatter(xdat,cumm_dat)
plt.plot(xdat_ext,cumm_fit,'g')
plt.title('CHN: 4/10/2020')
plt.ylabel('Total cases')
plt.xlabel('$t-t_0$ (days)')
#plt.legend()
plt.show();
fig.savefig('Total_CHN.png',format='png')

In [None]:
Nt=cumm_fit[-1]
Ncut=0.99*Nt
for i in range (len(xdat_ext)):
    if cumm_fit[i] > Ncut:
        imax=i
        break
        
print(Nt,imax-len(xdat))