In [3]:
import numpy as np
from data_loader import *
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as pyplot
from sklearn.metrics import mean_absolute_error
from prophet.diagnostics import cross_validation
import itertools
from prophet.diagnostics import performance_metrics

In [5]:
df_original = pd.read_csv("data/MSFT.csv")
df_original['Date'] = pd.to_datetime(df_original.Date, format='%Y-%m-%d')
print(df_original.head())

        Date      Open      High       Low     Close  Adj Close      Volume
0 1986-03-13  0.088542  0.101563  0.088542  0.097222   0.061751  1031788800
1 1986-03-14  0.097222  0.102431  0.097222  0.100694   0.063956   308160000
2 1986-03-17  0.100694  0.103299  0.100694  0.102431   0.065059   133171200
3 1986-03-18  0.102431  0.103299  0.098958  0.099826   0.063405    67766400
4 1986-03-19  0.099826  0.100694  0.097222  0.098090   0.062302    47894400


In [7]:
prophet_df = df_original[['Date','Adj Close']]
prophet_df = prophet_df.rename(columns = {'Date':'ds'})
prophet_df = prophet_df.rename(columns = {'Adj Close':'y'})
print(prophet_df.head())

train_mask = (prophet_df["ds"] < "2019-01-01")
test_mask = (prophet_df["ds"] >= "2019-01-01") & (prophet_df["ds"] < "2020-01-01")

train_set = prophet_df.loc[train_mask]
test_set = prophet_df.loc[test_mask]

          ds         y
0 1986-03-13  0.061751
1 1986-03-14  0.063956
2 1986-03-17  0.065059
3 1986-03-18  0.063405
4 1986-03-19  0.062302


In [12]:
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(train_set)  # Fit model with given params
    df_cv = cross_validation(m, initial='11680 days', period='20 days', horizon='7 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 15 forecasts with cutoffs between 2018-03-19 00:00:00 and 2018-12-24 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f8981147640>
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 15 forecasts with cutoffs between 2018-03-19 00:00:00 and 2018-12-24 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f8981147640>
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 15 forecasts with cutoffs between 2018-03-19 00:00:00 and 2018-12-24 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7f89383f1700>
INFO:prophet:Disabling daily seasonality. Run prophet with daily_se

In [8]:
m = Prophet(seasonality_mode='multiplicative',changepoint_prior_scale=0.5,seasonality_prior_scale=10)
m.fit(train_set)
df_cv = cross_validation(m, initial='11680 days', period='90 days', horizon = '7 days')


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 4 forecasts with cutoffs between 2018-03-29 00:00:00 and 2018-12-24 00:00:00


  0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
from prophet.diagnostics import performance_metrics
df_cv.head()
df_p = performance_metrics(df_cv)
df_p.tail()

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
2,3 days,485.486928,22.033768,19.305083,0.179172,0.179172,0.202298,0.0
3,4 days,139.549892,11.813124,11.342942,0.12726,0.12726,0.137137,0.0
4,5 days,379.930901,19.491816,19.229586,0.209203,0.209203,0.234014,0.0
5,6 days,625.960985,25.019212,24.314496,0.24286,0.227109,0.277287,0.0
6,7 days,504.003199,22.450016,20.427069,0.201498,0.214433,0.228326,0.0


In [16]:
test_dates = m.make_future_dataframe(periods = 7, include_history = False) 
test_dates = test_dates[test_dates['ds'].dt.weekday<5]
forecast = m.predict(test_dates)

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

          ds       yhat  yhat_lower  yhat_upper
0 2019-01-01  89.780009   84.500543   95.109402
1 2019-01-02  89.546778   84.516299   94.597111
2 2019-01-03  89.662565   84.754267   94.884150
3 2019-01-04  89.644507   84.473708   94.996413
4 2019-01-07  89.410696   84.609653   94.699849
