In [4]:
import pandas as pd
import requests
import json
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 500)


url = "https://api.covid19india.org/data.json"
r = requests.get(url)
data = r.json()

data = data['cases_time_series']
df = pd.DataFrame(data)

df = df.astype({'totalconfirmed': 'int64', 'totalrecovered': 'int64', 'totaldeceased': 'int64',
               'dailyconfirmed': 'int64', 'dailyrecovered': 'int64', 'dailydeceased': 'int64'})
columns_to_keep = ['dateymd', 'dailyconfirmed']
df = df[columns_to_keep]
df.rename(columns = {'dateymd':'Date', 'dailyconfirmed':'Confirmed'}, inplace = True)

In [5]:
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format = True)
df = df.set_index(['Date'])

In [6]:
df['Confirmed'].plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1b3c0aad390>

In [7]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    
    #Determining rolling statistics
    rolmean = timeseries.rolling(window = 7).mean()
    rolstd = timeseries.rolling(window = 7).std()
    
    #plotting rolling statistics
    plt.figure()
    orig = plt.plot(timeseries, color = 'blue', label = 'Original')
    mean = plt.plot(rolmean, color = 'red', label = 'Rolling Mean')
    st = plt.plot(rolstd, color = 'black', label = 'Rolling Std')
    plt.legend(loc = 'best')
    plt.title('Rolling  Mean & Standard Deviation')
    plt.show()
    
    #Dickey_Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag = 'AIC')
    dfoutput = pd.Series(dftest[0:4], index = ['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' %key] = value
    print(dfoutput)

In [None]:
from statsmodels.tsa.stattools import kpss

def kpss_test(timeseries):
    
    print ('Results of KPSS Test:')
    
    kpsstest = kpss(timeseries, regression='c', lags = 'auto')
    kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used'])
    
    for key,value in kpsstest[3].items():
        kpss_output['Critical Value (%s)'%key] = value
    print(kpss_output)

In [8]:
log10_df = df.copy()
log10_df['Confirmed'] = log10_df['Confirmed'].replace(0, 1)
log10_df['Log'] = np.log10(log10_df['Confirmed'])

In [9]:
test_stationarity(log10_df['Log'])

<IPython.core.display.Javascript object>

Results of Dickey-Fuller Test:
Test Statistic                  -3.263895
p-value                          0.016572
#Lags Used                      14.000000
Number of Observations Used    276.000000
Critical Value (1%)             -3.454267
Critical Value (5%)             -2.872070
Critical Value (10%)            -2.572381
dtype: float64


In [None]:
kpss_test(log10_df['Log'])

In [None]:
log_df = df.copy()
log_df['Confirmed'] = log_df['Confirmed'].replace(0, 1)
log_df['Log'] = np.log(log_df['Confirmed'])

In [None]:
test_stationarity(log_df['Log'])

In [None]:
kpss_test(log_df['Log'])

In [None]:
# df['Cube_Root'] = np.cbrt(df['Confirmed'])
# test_stationarity(df['Cube_Root'])

# df['Fourth_Root'] = np.power(df['Confirmed'], (1./4))
# test_stationarity(df['Fourth_Root'])

# df['Fifth_Root'] = np.power(df['Confirmed'], (1./5))
# test_stationarity(df['Fifth_Root'])

# df['Sixth_Root'] = np.power(df['Confirmed'], (1./6))
# test_stationarity(df['Sixth_Root'])

# df['Seventh_Root'] = np.power(df['Confirmed'], (1./7))
# test_stationarity(df['Seventh_Root'])

# df['Eigth_Root'] = np.power(df['Confirmed'], (1./8))
# test_stationarity(df['Eigth_Root'])

In [None]:
from pandas.plotting import autocorrelation_plot

In [None]:
plt.figure()
autocorrelation_plot(log10_df['Log'])
plt.show()

In [31]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [32]:
fig, (ax1, ax2) = plt.subplots(2, 1)

plot_acf(log10_df['Log'], lags = 40, ax = ax1)
plot_pacf(log10_df['Log'], lags = 40, ax = ax2)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
from statsmodels.tsa.arima_model import ARIMA

In [34]:
model = ARIMA(log10_df['Log'], order=(1, 1, 1))
model_fit = model.fit()



In [35]:
model_fit.summary()

0,1,2,3
Dep. Variable:,D.Log,No. Observations:,290.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,214.696
Method:,css-mle,S.D. of innovations,0.115
Date:,"Mon, 16 Nov 2020",AIC,-421.392
Time:,22:11:00,BIC,-406.712
Sample:,01-31-2020,HQIC,-415.51
,- 11-15-2020,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0158,0.004,4.055,0.000,0.008,0.023
ar.L1.D.Log,-0.0513,0.110,-0.465,0.642,-0.267,0.165
ma.L1.D.Log,-0.3982,0.096,-4.160,0.000,-0.586,-0.211

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,-19.5054,+0.0000j,19.5054,0.5000
MA.1,2.5114,+0.0000j,2.5114,0.0000


In [36]:
int(10 ** model_fit.forecast()[0][0])

38013

In [37]:
int(10 ** model_fit.forecast()[2][0][0])

22585

In [38]:
int(10 ** model_fit.forecast()[2][0][1])

63981

In [10]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(log10_df['Log'])
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.figure()
plt.subplot(411)
plt.plot(log10_df['Log'], label = 'Original')
plt.legend(loc = 'best')
plt.subplot(412)
plt.plot(trend, label = 'Trend')
plt.legend(loc = 'best')
plt.subplot(413)
plt.plot(seasonal, label = 'Seasonality')
plt.legend(loc = 'best')
plt.subplot(414)
plt.plot(residual, label = 'Residual')
plt.legend(loc = 'best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1b3d947b198>

In [None]:
resi = model_fit.resid

In [None]:
resi.describe()

In [None]:
plt.figure()
resi.plot()

In [None]:
test_stationarity(resi)

In [39]:
import statsmodels.api as sm

In [40]:
model = sm.tsa.statespace.SARIMAX(log10_df['Log'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 7))



In [41]:
model_fit = model.fit()

In [48]:
model_fit.forecast(2)

2020-11-16    4.481757
2020-11-17    4.543184
Freq: D, dtype: float64

In [43]:
int(10 ** model_fit.forecast()[0])

30321

In [44]:
yhat = model_fit.predict()

In [45]:
plt.figure()
plt.plot(log10_df['Log'])
plt.plot(yhat)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1b3d4d88a20>]

In [30]:
plt.figure()
model_fit.resid.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1b3dd9fcdd8>