In [32]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA

In [2]:
data = pd.read_csv(
    'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv',
    low_memory=False)

In [3]:
england = data[data['RegionName']=='England']

In [4]:
england

Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Jurisdiction,Date,C1_School closing,C1_Flag,C2_Workplace closing,C2_Flag,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
39474,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20200101,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39475,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20200102,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39476,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20200103,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39477,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20200104,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39478,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20200105,0.0,,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39856,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20210117,,,,,...,,,,,,,,,,
39857,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20210118,,,,,...,,,,,,,,,,
39858,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20210119,,,,,...,,,,,,,,,,
39859,United Kingdom,GBR,England,UK_ENG,STATE_TOTAL,20210120,,,,,...,,,,,,,,,,


In [6]:
X = england[['StringencyIndex']]
y = england[['ConfirmedDeaths']]

In [33]:
pipe = Pipeline([('imputer', SimpleImputer()),
                 ('scaler', RobustScaler())])

In [34]:
X = pipe.fit_transform(X)
y = pipe.fit_transform(y)

In [35]:
def difference(dataset, interval):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return diff

In [36]:
y_diff.shape

(386, 1)

In [37]:
X_diff = np.array(difference(X, 1))
y_diff = np.array(difference(y, 1))

In [38]:
arima = ARIMA(endog=y_diff, exog=X_diff, order=(1,1,1))

In [39]:
model = arima.fit()

In [40]:
model.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,386.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-457.377
Date:,"Thu, 21 Jan 2021",AIC,922.754
Time:,11:59:32,BIC,938.567
Sample:,0,HQIC,929.026
,- 386,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
x1,-0.0264,0.729,-0.036,0.971,-1.454,1.402
ar.L1,-0.0162,0.924,-0.018,0.986,-1.827,1.795
ma.L1,-1.0000,12.763,-0.078,0.938,-26.014,24.014
sigma2,0.6204,7.910,0.078,0.937,-14.883,16.123

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,614709.71
Prob(Q):,0.95,Prob(JB):,0.0
Heteroskedasticity (H):,1.61,Skew:,-13.81
Prob(H) (two-sided):,0.01,Kurtosis:,196.8


In [43]:
model.predict(start=0, end=385)

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -1.21752068e-02,  4.44891167e-04,
        6.19596101e-04,  5.90046716e-04,  5.63187533e-04,  5.38667169e-04,
        5.16192883e-04,  4.95518832e-04,  4.76437044e-04,  4.58770390e-04,
        4.42367076e-04,  4.27096269e-04,  4.12844597e-04, -1.17537956e-02,
        5.70554701e-04,  7.49879139e-04,  7.27803191e-04,  7.06989873e-04,
        6.87333875e-04,  6.68741278e-04,  6.51128059e-04,  6.34418821e-04,
        6.18545709e-04,  6.03447498e-04,  5.89068794e-04,  5.75359363e-04,
        5.62273539e-04,  5.49769719e-04,  5.37809919e-04,  5.26359393e-04,
        5.15386288e-04,  5.04861356e-04,  4.94757690e-04,  4.85050493e-04,
        4.75716878e-04,  