In [None]:
import pandas as pd
import numpy as np
import datetime
import requests
import warnings

import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
import seaborn as sns
import squarify
import plotly.offline as py
import plotly_express as px

from random import shuffle
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OrdinalEncoder
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

from IPython.display import Image
warnings.filterwarnings('ignore')
%matplotlib inline

# age_details = pd.read_csv('../input/covid19-in-india/AgeGroupDetails.csv')
# india_covid_19 = pd.read_csv('../input/covid19-in-india/covid_19_india.csv')
# hospital_beds = pd.read_csv('../input/covid19-in-india/HospitalBedsIndia.csv')
# individual_details = pd.read_csv('../input/covid19-in-india/IndividualDetails.csv')
# ICMR_labs = pd.read_csv('../input/covid19-in-india/ICMRTestingLabs.csv')
# state_testing = pd.read_csv('../input/covid19-in-india/StatewiseTestingDetails.csv')
# population = pd.read_csv('../input/covid19-in-india/population_india_census2011.csv')

confirmed_df = pd.read_csv('../input/time-series-covid19-confirmed-globalcsv/time_series_covid19_confirmed_global.csv')

# india_covid_19['Date'] = pd.to_datetime(india_covid_19['Date'],dayfirst = True)
# state_testing['Date'] = pd.to_datetime(state_testing['Date'])

## Prediction using Prophet Model <a id="10"></a>

In [None]:
dates = list(confirmed_df.columns[4:])
dates = list(pd.to_datetime(dates))

df1 = confirmed_df.groupby('Country/Region').sum().reset_index()
k = df1[df1['Country/Region']=='Germany'].loc[:,'1/22/20':]
germany_confirmed = k.values.tolist()[0] 
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = dates
data['y'] = germany_confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=60)
prop_forecast=prop.predict(future)
# forecast = prop_forecast[['ds','yhat']].tail(60)

fig = plot_plotly(prop, prop_forecast)
fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')

## Prediction using ARIMA <a id="11"></a>

In [None]:
arima = ARIMA(data['y'], order=(5, 1, 0))
arima = arima.fit(trend='c', full_output=True, disp=True)
forecast = arima.forecast(steps=60)
pred = list(forecast[0])

start_date = data['ds'].max()
prediction_dates = []
for i in range(60):
    date = start_date + datetime.timedelta(days=1)
    prediction_dates.append(date)
    start_date = date
plt.figure(figsize= (15,10))
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("Predicted Values for the next 60 Days" , fontsize = 20)

plt.plot_date(y= pred,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Predicted');
plt.plot_date(y=data['y'],x=data['ds'],linestyle = '-',color = 'blue',label = 'Actual');
plt.legend();

## LGBM Regressor <a id="12"></a>

In [None]:
data['day'] = data['ds'].dt.day
data['month'] = data['ds'].dt.month
data['dayofweek'] = data['ds'].dt.dayofweek
data['dayofyear'] = data['ds'].dt.dayofyear
data['quarter'] = data['ds'].dt.quarter
data['weekofyear'] = data['ds'].dt.weekofyear
X_train = data[['day', 'month', 'dayofweek', 'dayofyear', 'quarter', 'weekofyear']]
y_train = data['y']

test = pd.DataFrame()
test['ds'] = prediction_dates
test['day'] = test['ds'].dt.day
test['month'] = test['ds'].dt.month
test['dayofweek'] = test['ds'].dt.dayofweek
test['dayofyear'] = test['ds'].dt.dayofyear
test['quarter'] = test['ds'].dt.quarter
test['weekofyear'] = test['ds'].dt.weekofyear
X_test = test[['day', 'month', 'dayofweek', 'dayofyear', 'quarter', 'weekofyear']]

In [None]:
eps = 0.0001 # log will explode when 0
_y_train = np.log((np.diff(y_train)+eps))
_X_train = X_train[:-1]

In [None]:
lgbm = LGBMRegressor(n_estimators=1000)
lgbm.fit(_X_train,_y_train)
pred = lgbm.predict(X_test)

plt.figure(figsize= (15,10))
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("Predicted Values for the next 60 Days" , fontsize = 20)

_pred_total = [y_train.iloc[-1]]
for i in range(len(pred)):
    _pred_total.append(np.exp(pred[i]) - eps + _pred_total[-1])

plt.plot_date(y=_pred_total[1:], x=test['ds'],linestyle ='dashed',color = '#ff9999',label = 'Predicted');
plt.plot_date(y=y_train,x=data['ds'],linestyle = '-',color = 'blue',label = 'Actual');

## Random Forest Regressor <a id="13"></a>

In [None]:
rf = RandomForestRegressor(n_estimators=1000)
rf.fit(_X_train,_y_train)
pred = rf.predict(X_test)

_pred_total = [y_train.iloc[-1]]
for i in range(len(pred)):
    _pred_total.append(np.exp(pred[i]) - eps + _pred_total[-1])
    
plt.figure(figsize= (15,10))
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("Predicted Values for the next 60 Days" , fontsize = 20)
plt.plot_date(y=_pred_total[1:], x=test['ds'],linestyle ='dashed',color = '#ff9999',label = 'Predicted');
plt.plot_date(y=data['y'],x=data['ds'],linestyle = '-',color = 'blue',label = 'Actual');

## XGB Regressor<a id="14"></a>

In [None]:
xgb = XGBRegressor(n_estimators=1000)
xgb.fit(_X_train,_y_train)
pred = xgb.predict(X_test)

_pred_total = [y_train.iloc[-1]]
for i in range(len(pred)):
    _pred_total.append(np.exp(pred[i]) - eps + _pred_total[-1])

plt.figure(figsize= (15,10))
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("Predicted Values for the next 60 Days" , fontsize = 20)
plt.plot_date(y=_pred_total[1:], x=test['ds'],linestyle ='dashed',color = '#ff9999',label = 'Predicted');
plt.plot_date(y=data['y'],x=data['ds'],linestyle = '-',color = 'blue',label = 'Actual');