In [2]:
import os
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA as arima
from sklearn.metrics import mean_squared_error as mse

In [24]:
df = pd.read_csv('main.csv')

In [25]:
df.head()

Unnamed: 0,Date,confirmed,recovered,deaths
0,2020-01-22,0,0,0
1,2020-01-23,0,0,0
2,2020-01-24,0,0,0
3,2020-01-25,0,0,0
4,2020-01-26,0,0,0


In [27]:
df['Date'] = pd.to_datetime(df['Date'])

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 817 entries, 0 to 816
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       817 non-null    datetime64[ns]
 1   confirmed  817 non-null    int64         
 2   recovered  817 non-null    int64         
 3   deaths     817 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 25.7 KB


In [29]:
res1 = adfuller(df['confirmed'])
res2 = adfuller(df['recovered'])
res3 = adfuller(df['deaths'])

print(res1[1], res2[1], res3[1])

0.05296813241159319 0.0 0.29992748020926774


In [30]:
## they are stationary

In [31]:
model_auto_confirmed = auto_arima(df['confirmed'], trace=True, suppress_warnings=True)
model_auto_recovered = auto_arima(df['recovered'], trace=True, suppress_warnings=True)
model_auto_deceased = auto_arima(df['deaths'], trace=True, suppress_warnings=True)


Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=16942.969, Time=0.35 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=17057.703, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=17005.001, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=17000.262, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=17055.704, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=16965.946, Time=0.26 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=16989.804, Time=0.31 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=16858.387, Time=0.69 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=16933.925, Time=0.36 sec
 ARIMA(4,1,2)(0,0,0)[0] intercept   : AIC=16927.077, Time=0.78 sec
 ARIMA(3,1,3)(0,0,0)[0] intercept   : AIC=16830.775, Time=1.68 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=16925.194, Time=0.71 sec
 ARIMA(4,1,3)(0,0,0)[0] intercept   : AIC=16823.826, Time=1.49 sec
 ARIMA(5,1,3)(0,0,0)[0] intercept   : AIC=16828.623, Time=1.68 sec
 ARIMA(4,1,4)(0,0,0

In [32]:
len(df)

817

In [33]:
.8*len(df)

653.6

In [34]:
split_point = 654

In [35]:
df.set_index('Date', inplace=True)
df.head()

Unnamed: 0_level_0,confirmed,recovered,deaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,0,0,0
2020-01-23,0,0,0
2020-01-24,0,0,0
2020-01-25,0,0,0
2020-01-26,0,0,0


In [36]:
y_train = df[:split_point]
y_test = df[split_point:]

In [37]:
len(y_test)

163

In [38]:
## FOR CONFIRMED CASES

# model training
model_confirmed = arima(y_train['confirmed'], order=(2,0,2))
res_confirmed = model_confirmed.fit()

# predictions
y_pred_confirmed = res_confirmed.predict()

# forecasting 
y_forecast_confirmed = res_confirmed.forecast(steps=163)

# model evaluation
np.sqrt(mse(y_forecast_confirmed, y_test['confirmed']))



85124.25141374518

In [39]:
## FOR DECEASED CASES

# model training
model_deceased = arima(y_train['deaths'], order=(0,1,1))
res_deceased = model_deceased.fit()

# predictions
y_pred_deceased = res_deceased.predict()

# forecasting 
y_forecast_deceased = res_deceased.forecast(steps=163)

# model evaluation
np.sqrt(mse(y_forecast_deceased, y_test['deaths']))



462.69110615566603

In [40]:
## FOR RECOVERED CASES

# model training
model_recovered = arima(y_train['recovered'], order=(0,1,0))
res_recovered = model_recovered.fit()

# predictions
y_pred_recovered = res_recovered.predict()

# forecasting 
y_forecast_recovered = res_recovered.forecast(steps=163)

# model evaluation
np.sqrt(mse(y_forecast_recovered, y_test['recovered']))



0.0

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 817 entries, 2020-01-22 to 2022-04-17
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   confirmed  817 non-null    int64
 1   recovered  817 non-null    int64
 2   deaths     817 non-null    int64
dtypes: int64(3)
memory usage: 25.5 KB


In [42]:
from datetime import date
today = date.today()
today

datetime.date(2022, 4, 18)

In [43]:
import datetime
datetime.datetime.now() + datetime.timedelta(days=7)

datetime.datetime(2022, 4, 25, 20, 40, 18, 413955)

In [46]:
dates = []
for i in range(0, 7):
    dates.append(today + datetime.timedelta(days=i))
type(dates[0])

datetime.date

In [54]:
model_confirmed = arima(df['confirmed'], order=(2,1,2))
res_confirmed = model_confirmed.fit()

model_deceased = arima(df['deaths'], order=(0,1,1))
res_deceased = model_deceased.fit()

model_recovered = arima(df['recovered'], order=(0,1,0))
res_recovered = model_recovered.fit()



In [48]:
dates = pd.to_datetime(dates)
dates

DatetimeIndex(['2022-04-18', '2022-04-19', '2022-04-20', '2022-04-21',
               '2022-04-22', '2022-04-23', '2022-04-24'],
              dtype='datetime64[ns]', freq=None)

In [55]:
y_forecast_confirmed_next_7_days = res_confirmed.forecast(steps=7)
y_forecast_deceased_next_7_days = res_deceased.forecast(steps=7)
y_forecast_recovered_next_7_days = res_recovered.forecast(steps=7)

In [59]:
y_forecast_confirmed_next_7_days = y_forecast_confirmed_next_7_days.astype(np.int64)
y_forecast_confirmed_next_7_days

2022-04-18    2407
2022-04-19    2448
2022-04-20    2614
2022-04-21    2675
2022-04-22    2804
2022-04-23    2871
2022-04-24    2975
Freq: D, Name: predicted_mean, dtype: int64

In [60]:
y_forecast_deceased_next_7_days = y_forecast_deceased_next_7_days.astype(np.int64)
y_forecast_deceased_next_7_days

2022-04-18    79
2022-04-19    79
2022-04-20    79
2022-04-21    79
2022-04-22    79
2022-04-23    79
2022-04-24    79
Freq: D, Name: predicted_mean, dtype: int64

In [61]:
y_forecast_recovered_next_7_days = y_forecast_recovered_next_7_days.astype(np.int64)
y_forecast_recovered_next_7_days

2022-04-18    0
2022-04-19    0
2022-04-20    0
2022-04-21    0
2022-04-22    0
2022-04-23    0
2022-04-24    0
Freq: D, Name: predicted_mean, dtype: int64

In [62]:
y_forecast_confirmed_next_7_days[0]

2407

In [76]:
data_val = []
for i in range(len(y_forecast_confirmed_next_7_days)):
    a = y_forecast_confirmed_next_7_days[i]
    b = y_forecast_deceased_next_7_days[i]
    c = y_forecast_recovered_next_7_days[i]
    l = []
    l.append(dates[i])
    l.append(a)
    l.append(b)
    l.append(c)
    data_val.append(l)

In [77]:
data_val

[[Timestamp('2022-04-18 00:00:00'), 2407, 79, 0],
 [Timestamp('2022-04-19 00:00:00'), 2448, 79, 0],
 [Timestamp('2022-04-20 00:00:00'), 2614, 79, 0],
 [Timestamp('2022-04-21 00:00:00'), 2675, 79, 0],
 [Timestamp('2022-04-22 00:00:00'), 2804, 79, 0],
 [Timestamp('2022-04-23 00:00:00'), 2871, 79, 0],
 [Timestamp('2022-04-24 00:00:00'), 2975, 79, 0]]

In [79]:
headers = ['Date', 'Confirmed', 'Deaths', 'Recovered']
df_next_seven_days = pd.DataFrame(data_val, columns=headers)
df_next_seven_days

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2022-04-18,2407,79,0
1,2022-04-19,2448,79,0
2,2022-04-20,2614,79,0
3,2022-04-21,2675,79,0
4,2022-04-22,2804,79,0
5,2022-04-23,2871,79,0
6,2022-04-24,2975,79,0
