In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd
from datetime import datetime
import statsmodels.api as sm
import itertools
import warnings
warnings.filterwarnings("ignore")# data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
series=pd.read_csv("/kaggle/input/corona-virus-report/covid_19_clean_complete.csv")

In [None]:
cases = pd.read_csv('/kaggle/input/corona-virus-report/day_wise.csv')

In [None]:
cases.head()

In [None]:
cases['New recovered'].plot()

In [None]:
cases.head()

In [None]:
import matplotlib.pyplot as plt

Forecasting the Covid-19 Situation around the World (Generalised Model Globally)

In [None]:
cases.isnull().sum()

In [None]:
cases.shape

In [None]:
recovered = cases[['Date','New recovered']]

In [None]:
recovered['Date'].min(),series['Date'].max()

In [None]:
recov=recovered.loc[(recovered['Date']>'2020-05-16' )& (recovered['Date'] < '2020-07-15')].copy()

In [None]:
recov=recov.groupby('Date')['New recovered'].sum().reset_index()

In [None]:
recov.head()

In [None]:
rtoll = recov.copy()

In [None]:
rtoll.head()

In [None]:
rtoll.set_index('Date',inplace=True)

In [None]:
rtoll.index = pd.to_datetime(rtoll.index)

In [None]:
rtoll.head()

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] =5,5
rtoll.plot()

Using the dicky fuller method for testing stationarity of the data

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
test_results = adfuller(rtoll['New recovered'])

In [None]:
#Ho: It is Not Stattionary
#HA : It is Stationary

def adfuller_test (deaths):
    result = adfuller(deaths)
    labels=['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
    for value,label in zip(result,labels):
        print(label+':'+ str(value))
    if(result[1]<=0.05):
        print("Stationary")
    else:
        print("Not Stationary")

In [None]:
adfuller_test(rtoll['New recovered'])

As we can see that the p value greater them 0.05 and thus data is Not Stationary... Accept Null Hypothesis

Differencing Used To Make Data Stationary

In [None]:
plt.figure(figsize=(40,8))
plt.plot(np.log(rtoll['New recovered']))

In [None]:
rtoll['New recovered Diff'] = rtoll['New recovered']-rtoll['New recovered'].shift(1)

In [None]:
adfuller_test(rtoll['New recovered Diff'].dropna())

In [None]:
rtoll.isnull().sum()

In [None]:
rtoll.drop(['New recovered'],axis=1,inplace=True)

******As you can see that the data has become stationary after differencing**

In [None]:
from pylab import rcParams 
rcParams['figure.figsize'] =18,8
decomposition = sm.tsa.seasonal_decompose(rtoll.dropna(),model='additive')
fig = decomposition.plot()
plt.show()

# **ARIMA MODEL FOR TIME SERIES FORECASTING OF RECOVERED CASES**

In [None]:
rtoll.head()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

In [None]:
fig = plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
fig=sm.graphics.tsa.plot_acf(rtoll['New recovered Diff'].iloc[1:],lags=40,ax=ax1)
ax2=fig.add_subplot(212)
fig=sm.graphics.tsa.plot_pacf(rtoll['New recovered Diff'].iloc[1:],lags=40,ax=ax2)

From the Above Graph we can see that p value is 1 , as it shuts of in that position , q=0 because the exponential characteristics is unseen

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
p = q = d = range(0,2)
pdq = list(itertools.product(p,d,q))

In [None]:
for param in pdq:
        try:
            model=ARIMA(rtoll['New recovered Diff'].dropna(),order=param)
            results=model.fit()
            print('ARIMA{}- AIC:{}'.format(param,results.aic))
        except:
            continue

In [None]:
model = ARIMA(rtoll['New recovered Diff'].dropna(),order=(0,0,1))

In [None]:
results=model.fit()

In [None]:
results.summary().tables[1]

In [None]:
rtoll['Forecast']=results.predict(start=pd.to_datetime('2020-07-01'),dynamic=False)

In [None]:
rtoll['Forecast'].tail(5)

In [None]:
ax=rtoll['New recovered Diff'].plot(label='Observed')
rtoll['Forecast'].plot(ax=ax,label='Predicted',alpha=.7,figsize=(14,7))
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.show()

In [None]:
rtoll.index[-1]

In [None]:
#Getting the forecasted value for the next seven days
forecast = results.forecast(steps=7)[0]

In [None]:
forecast

In [None]:
from pandas.tseries.offsets import DateOffset
future_dates = [rtoll.index[-1]+ DateOffset(days=x)for x in range(0,7)]

In [None]:
future_dates_df = pd.DataFrame(index=future_dates[1:],columns=rtoll.columns)

In [None]:
future_df = pd.concat([rtoll,future_dates_df])

In [None]:
future_df.tail(6)

In [None]:
future_df['Forecast'] = results.predict(start='2020-07-01',end='2020-07-20',dynamic=False)
future_df[['New recovered Diff','Forecast']].plot()