In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
plt.style.use('fivethirtyeight')

In [2]:
covid = pd.read_csv('../input/kr-covid19/covid-19.csv', index_col = [0], parse_dates = [0])
covid.head()



In [3]:
color_pal = ["#F8766D", "#D39200", "#93AA00",
             "#00BA38", "#00C19F", "#00B9E3",
             "#619CFF", "#DB72FB"]
covid['new_cases'].plot(style ='.', figsize =(20,5), color = color_pal[2], title ='New confirmed Trend')
covid.columns

In [4]:
def create_features(df, label = None):
    """
    Creates time series features from datetime index.
    """
    
    df = df.copy()
    df['date'] = df.index
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.isocalendar().week 
    X = df[['dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X
    

In [5]:
X, y = create_features(covid, label='new_cases')
features_and_target = pd.concat([X,y], axis = 1)

In [6]:
features_and_target.head()

In [7]:
sns.pairplot(features_and_target.dropna(), x_vars =['dayofweek', 'year', 'weekofyear'], y_vars = 'new_cases', height = 5, plot_kws={'alpha':0.15, 'linewidth':0})

In [8]:
split_date = '01-Feb-2021'
covid_train = covid.loc[covid.index <= split_date].copy()
covid_test = covid.loc[covid.index > split_date].copy()

a = covid_train.drop(columns = 'total_cases')
b = covid_test.drop(columns = 'total_cases')

In [9]:
b \
    .rename(columns={'new_cases': 'TEST SET'}) \
    .join(a.rename(columns={'new_cases': 'TRAINING SET'}),
          how='outer') \
    .plot(figsize=(15,5), title='Sales ', style='.')
plt.show()

In [10]:
covid_train1 = a.reset_index().rename(columns={'date' : 'ds', 'new_cases':'y'})
covid_train1

In [11]:
model = Prophet()
model.fit(covid_train1)

In [12]:
covid_fcst = model.predict(df=b.reset_index() \
                                   .rename(columns={'date':'ds'}))

In [13]:
covid_fcst.head()

f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
fig = model.plot(covid_fcst,
                 ax=ax)
plt.show()

In [14]:
fig = model.plot_components(covid_fcst)

In [15]:
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
ax.scatter(b.index, b['new_cases'], color='r')
fig = model.plot(covid_fcst, ax=ax)

In [16]:
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
ax.scatter(b.index, b['new_cases'], color='r')
f = model.plot(covid_fcst, ax=ax)
ax.set_xbound(b.index[100], b.index[200])
ax.set_ylim(0, 3000)
plot = plt.suptitle('Forecast 3 months vs Actuals')

In [17]:
mean_squared_error(y_true=b['new_cases'],
                   y_pred=covid_fcst['yhat'])

In [18]:
mean_absolute_error(y_true=b['new_cases'],
                   y_pred=covid_fcst['yhat'])

In [19]:
def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(y_true=b['new_cases'],
                   y_pred=covid_fcst['yhat'])