In [32]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import tqdm as tqdm

import matplotlib.pyplot as plt

%matplotlib inline 

import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn


# special matplotlib argument for improved plots
from matplotlib import rcParams

In [33]:
data = pd.read_csv("MSN Historical Data.csv")
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2017/12/1,68500,67000,69400,66500,725370,0.02
1,2017/12/4,69000,70000,70000,68800,1190000,0.01
2,2017/12/5,68900,69000,70100,68600,777070,0.0
3,2017/12/6,67700,68900,69000,66000,1080000,-0.02
4,2017/12/7,69000,68600,69800,67300,1560000,0.02


In [34]:
dataset = data.loc[:,["Date","Price"]]
dataset.head()

Unnamed: 0,Date,Price
0,2017/12/1,68500
1,2017/12/4,69000
2,2017/12/5,68900
3,2017/12/6,67700
4,2017/12/7,69000


In [35]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1249 entries, 0 to 1248
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    1249 non-null   object
 1   Price   1249 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 19.6+ KB


In [36]:
dataset.Date = pd.to_datetime(dataset.Date,format = "%Y/%m/%d")
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1249 entries, 0 to 1248
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    1249 non-null   datetime64[ns]
 1   Price   1249 non-null   int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 19.6 KB


In [37]:
dataset = dataset.rename(columns = {"Price" : "y"})
dataset = dataset.rename(columns = {"Date" : "ds"})
dataset.head()

Unnamed: 0,ds,y
0,2017-12-01,68500
1,2017-12-04,69000
2,2017-12-05,68900
3,2017-12-06,67700
4,2017-12-07,69000


In [38]:
test_days = 124
training_set = dataset.iloc[:-test_days, :]
test_set = dataset.iloc[-test_days:, :]
test_set.tail(5)

Unnamed: 0,ds,y
1244,2022-11-25,94000
1245,2022-11-28,97200
1246,2022-11-29,99000
1247,2022-11-30,102000
1248,2022-12-01,99000


In [39]:
print(len(training_set))
print(len(test_set))

1125
124


In [40]:
training_set.tail(5)

Unnamed: 0,ds,y
1120,2022-06-02,113465
1121,2022-06-03,111976
1122,2022-06-06,116145
1123,2022-06-07,117039
1124,2022-06-08,118428


In [41]:
m = Prophet(growth = "linear",
            yearly_seasonality = True,
            weekly_seasonality = True,
            daily_seasonality = False,
            seasonality_mode = "multiplicative",
            seasonality_prior_scale = 10,
            holidays_prior_scale = 10,
            changepoint_prior_scale = 0.05)
m.fit(training_set)

<fbprophet.forecaster.Prophet at 0x7fa1579e19d0>

In [42]:
future = m.make_future_dataframe(periods = len(test_set), freq = "D")
future.tail()

Unnamed: 0,ds
1244,2022-10-06
1245,2022-10-07
1246,2022-10-08
1247,2022-10-09
1248,2022-10-10


In [43]:
future = pd.concat([future, dataset.iloc[:,1:]],
                   axis = 1)
future.tail(5)

Unnamed: 0,ds,y
1244,2022-10-06,94000
1245,2022-10-07,97200
1246,2022-10-08,99000
1247,2022-10-09,102000
1248,2022-10-10,99000


In [44]:
forecast = m.predict(future)
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2017-12-01,8589.184447,62064.413973,72224.425502,8589.184447,8589.184447,6.830328,6.830328,6.830328,6.199174,6.199174,6.199174,0.631154,0.631154,0.631154,0.0,0.0,0.0,67256.129112
1,2017-12-04,8742.796319,63288.324056,73342.505497,8742.796319,8742.796319,6.833288,6.833288,6.833288,6.185095,6.185095,6.185095,0.648193,0.648193,0.648193,0.0,0.0,0.0,68484.843143
2,2017-12-05,8794.000276,64124.566616,74269.995214,8794.000276,8794.000276,6.854197,6.854197,6.854197,6.198393,6.198393,6.198393,0.655804,0.655804,0.655804,0.0,0.0,0.0,69069.807594
3,2017-12-06,8845.204233,64647.217585,74764.909758,8845.204233,8845.204233,6.873279,6.873279,6.873279,6.209056,6.209056,6.209056,0.664224,0.664224,0.664224,0.0,0.0,0.0,69640.765072
4,2017-12-07,8896.40819,64985.496915,75116.32992,8896.40819,8896.40819,6.876142,6.876142,6.876142,6.202804,6.202804,6.202804,0.673338,0.673338,0.673338,0.0,0.0,0.0,70069.369842


In [45]:
predictions_prophet = forecast.yhat[-test_days:].rename("prophet")
predictions_prophet[:5]

1125    108269.167867
1126    108030.942127
1127   -257069.623842
1128   -257319.097190
1129    107240.834026
Name: prophet, dtype: float64

In [46]:
from fbprophet.diagnostics import cross_validation
cs = cross_validation(m,
                         #Dự đoán 30 ngày tiếp theo
                         horizon = '30 days',
                         #Lấy 540 ngày dể đào tạo
                         initial = '540 days',
                         #Xử lý song song
                         parallel = "processes")

INFO:fbprophet:Making 73 forecasts with cutoffs between 2019-05-25 00:00:00 and 2022-05-09 00:00:00
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa16bff9640>


In [47]:
y_true = cs['y'].values
y_pre = cs['yhat'].values

In [48]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
mae = mean_absolute_error(y_true,y_pre)
mape = mean_absolute_percentage_error(y_true,y_pre)
mse = mean_squared_error(y_true, y_pre)
rmse = np.sqrt(mse)
r2 = r2_score(y_true,y_pre)

print(f"MAE: {mae:.2f}")
print(f"MAPE: {mape * 100:.2f}%")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2: {r2:.2f}")

MAE: 7718.05
MAPE: 9.91%
MSE: 104272510.01
RMSE: 10211.39
R2: 0.87


In [49]:
from sklearn.model_selection import ParameterGrid
param_grid = {'seasonality_prior_scale': np.arange(5, 10, 5), #[5, 10]  np.arange(5, 20, 5),
              'changepoint_prior_scale': np.arange(0.01, 0.05, 0.01),  # [0.01, 0.02, 0.03, 0.04, 0.05]
              'holidays_prior_scale': np.arange(5, 10, 5)}#[5, 10]
grid = ParameterGrid(param_grid)
list(grid)[:3]  

[{'changepoint_prior_scale': 0.01,
  'holidays_prior_scale': 5,
  'seasonality_prior_scale': 5},
 {'changepoint_prior_scale': 0.02,
  'holidays_prior_scale': 5,
  'seasonality_prior_scale': 5},
 {'changepoint_prior_scale': 0.03,
  'holidays_prior_scale': 5,
  'seasonality_prior_scale': 5}]

In [None]:
rmse = []
for params in grid:
  #build model
  m = Prophet(growth = "linear",
            yearly_seasonality = True,
            weekly_seasonality = True,
            daily_seasonality = False,
            seasonality_mode = "multiplicative",
            seasonality_prior_scale = params['seasonality_prior_scale'],
            holidays_prior_scale = params['holidays_prior_scale'],
            changepoint_prior_scale = params['changepoint_prior_scale'])
  m.fit(training_set)

  #cross-validation
  cs = cross_validation(m,
                         horizon = '30 days',
                         initial = '540 days',
                         parallel = "processes")
  
  #gather the results
  error = np.sqrt(mean_squared_error(cs['y'], 
                                     cs['yhat']))
  rmse.append(error)

INFO:fbprophet:Making 73 forecasts with cutoffs between 2019-05-25 00:00:00 and 2022-05-09 00:00:00
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa156e80370>
INFO:fbprophet:Making 73 forecasts with cutoffs between 2019-05-25 00:00:00 and 2022-05-09 00:00:00
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa16f270a60>
INFO:fbprophet:Making 73 forecasts with cutoffs between 2019-05-25 00:00:00 and 2022-05-09 00:00:00
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa156e6d9d0>
INFO:fbprophet:Making 73 forecasts with cutoffs between 2019-05-25 00:00:00 and 2022-05-09 00:00:00
INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa156e7a610>


In [None]:
best_params = grid[np.argmin(rmse)]
print(best_params)

{'seasonality_prior_scale': 5, 'holidays_prior_scale': 5, 'changepoint_prior_scale': 0.04}


In [None]:
m = Prophet(growth = "linear",
          yearly_seasonality = True,
          weekly_seasonality = True,
          daily_seasonality = False,
          seasonality_mode = "multiplicative",
          seasonality_prior_scale = best_params['seasonality_prior_scale'],
          holidays_prior_scale = best_params['holidays_prior_scale'],
          changepoint_prior_scale = best_params['changepoint_prior_scale'])
m.fit(training_set)

<fbprophet.forecaster.Prophet at 0x7fa156e80040>

In [None]:
future = m.make_future_dataframe(periods = len(test_set),
                                 freq = "D")
future = pd.concat([future, dataset.iloc[:,1:]],
                   axis = 1)
future.tail(1)

Unnamed: 0,ds,y
1248,2022-10-10,99000


In [None]:
forecast = m.predict(future)
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2017-12-01,7278.153939,62148.091953,72167.359403,7278.153939,7278.153939,8.227185,8.227185,8.227185,7.476875,7.476875,7.476875,0.750311,0.750311,0.750311,0.0,0.0,0.0,67156.87496
1,2017-12-04,7409.713029,63329.020254,73159.373187,7409.713029,7409.713029,8.230061,8.230061,8.230061,7.460304,7.460304,7.460304,0.769758,0.769758,0.769758,0.0,0.0,0.0,68392.106676
2,2017-12-05,7453.56606,64217.381469,73853.843848,7453.56606,7453.56606,8.254393,8.254393,8.254393,7.475892,7.475892,7.475892,0.778501,0.778501,0.778501,0.0,0.0,0.0,68978.232429
3,2017-12-06,7497.41909,64494.80279,74257.590364,7497.41909,7497.41909,8.27676,8.27676,8.27676,7.48857,7.48857,7.48857,0.78819,0.78819,0.78819,0.0,0.0,0.0,69551.760377
4,2017-12-07,7541.27212,65086.976553,74967.21282,7541.27212,7541.27212,8.279849,8.279849,8.279849,7.481159,7.481159,7.481159,0.79869,0.79869,0.79869,0.0,0.0,0.0,69981.86484


In [None]:
print(len(forecast['yhat']))
print(len(forecast.yhat[-test_days:]))
print(len(test_set["ds"]))

1249
124
124


In [None]:
predictions_prophet_tuned = forecast.yhat[-test_days:].rename("prophet_tuned")
predictions_prophet_tuned.index = test_set["ds"]
predictions_prophet_tuned.head()

ds
2022-06-09    108288.311241
2022-06-10    108039.708981
2022-06-13   -265643.319283
2022-06-14   -265903.254456
2022-06-15    107219.364499
Name: prophet_tuned, dtype: float64