In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sktime.forecasting.base                import ForecastingHorizon
from sktime.forecasting.trend               import ProphetPiecewiseLinearTrendForecaster
from sktime.forecasting.model_selection     import temporal_train_test_split
from sktime.forecasting.fbprophet           import Prophet
from sktime.performance_metrics.forecasting import MeanSquaredError

from darts.datasets import ETTh1Dataset
series = ETTh1Dataset().load()
data = series.pd_dataframe()

import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

In [42]:
np.random.seed(1000)
mse = MeanSquaredError(square_root=True)

def fitPredictProphets(y_train,y_test,scale):
    fh = ForecastingHorizon(y_test.index, is_relative=False)
    current =  ProphetPiecewiseLinearTrendForecaster(changepoint_prior_scale=scale)
    suggestion = Prophet(
                        daily_seasonality=False,
                        weekly_seasonality=False,
                        yearly_seasonality=False,
                        changepoint_prior_scale=scale
                        )
    
    y_pred_current  = current.fit(y_train).predict(fh)
    y_pred_suggestion  = suggestion.fit(y_train).predict(fh)
    return y_pred_current,y_pred_suggestion

results_current = []
results_suggestion = []

for scale in np.logspace(0.001,0.5,4):
    for freq in ["H","6H","12H","D","w","M","Q","Y"]:
        for column in data.columns:
            y = data[column].resample(freq).sum()
            y_train, y_test = temporal_train_test_split(y)
            y_pred_current,y_pred_suggested = fitPredictProphets(y_train,y_test,scale)           
            results_current.append(mse(y_test,y_pred_current))
            results_suggestion.append(mse(y_test,y_pred_suggested))

            # fig, ax = plt.subplots(figsize=(10, 2))
            # plt.plot(y, color='grey', linestyle="--", label="y")
            # plt.plot(y_pred_pwl,label="prediction pwl", alpha=0.5, linewidth=2)
            # plt.plot(y_pred_pNoSeason,label="prediction prophet no season", alpha=0.5, linewidth=2)
            # plt.legend()
            # plt.show()

17:26:57 - cmdstanpy - INFO - Chain [1] start processing
17:27:02 - cmdstanpy - INFO - Chain [1] done processing
17:27:02 - cmdstanpy - INFO - Chain [1] start processing
17:27:06 - cmdstanpy - INFO - Chain [1] done processing
17:27:07 - cmdstanpy - INFO - Chain [1] start processing


KeyboardInterrupt: 

In [None]:
total = len(np.array(results_current))
current_wins = np.sum(np.array(results_current)<np.array(results_suggestion))
suggestion_wins = np.sum(np.array(results_current)>np.array(results_suggestion))
ties = np.sum(np.array(results_current)==np.array(results_suggestion))

print(f"Out of {total} runs: \nThe current implementation is more accurate in "+
      f"{current_wins} ({np.round_(current_wins/total*100,1)}%) cases.\n"+
      f"The suggested change is more accurate in {suggestion_wins}"+
      f"({np.round_(suggestion_wins/total*100,1)}%) cases.\n"+
      f"In {ties} ({np.round_(ties/total*100,1)}%) cases they return the same value.")

Out of 168 runs: 
The current implementation is more accurate in 52 (31.0%) cases.
The suggested change is more accurate in 32(19.0%) cases.
In 84 (50.0%) cases they return the same value.


In [93]:
def Randomwalk1D(n): 
   y = 0
   yposition = [0] 
   for i in range(n-1):
       step = np.random.uniform(0,1)
       if step < 0.5: # if step is less than 0.5 we move up    
           y += 1  
       if step > 0.5: # if step is greater than 0.5 we move down  
           y += -1 
       yposition.append(y)
   return yposition

n = 1000 
index = pd.date_range(start='2016-01-01', periods = n, freq='15min')


results_current = []
results_suggestion = []

for scale in np.logspace(0.001,0.5,5):
    for i in range(100):
        y = pd.Series(data=Randomwalk1D(n), index=index)
        y_train, y_test = temporal_train_test_split(y)
        y_pred_current,y_pred_suggested = fitPredictProphets(y_train,y_test,scale)           
        results_current.append(mse(y_test,y_pred_current))
        results_suggestion.append(mse(y_test,y_pred_suggested))


18:02:43 - cmdstanpy - INFO - Chain [1] start processing
18:02:44 - cmdstanpy - INFO - Chain [1] done processing
18:02:44 - cmdstanpy - INFO - Chain [1] start processing
18:02:44 - cmdstanpy - INFO - Chain [1] done processing
18:02:44 - cmdstanpy - INFO - Chain [1] start processing
18:02:45 - cmdstanpy - INFO - Chain [1] done processing
18:02:45 - cmdstanpy - INFO - Chain [1] start processing
18:02:45 - cmdstanpy - INFO - Chain [1] done processing
18:02:45 - cmdstanpy - INFO - Chain [1] start processing
18:02:46 - cmdstanpy - INFO - Chain [1] done processing
18:02:46 - cmdstanpy - INFO - Chain [1] start processing
18:02:46 - cmdstanpy - INFO - Chain [1] done processing
18:02:46 - cmdstanpy - INFO - Chain [1] start processing
18:02:47 - cmdstanpy - INFO - Chain [1] done processing
18:02:47 - cmdstanpy - INFO - Chain [1] start processing
18:02:47 - cmdstanpy - INFO - Chain [1] done processing
18:02:47 - cmdstanpy - INFO - Chain [1] start processing
18:02:47 - cmdstanpy - INFO - Chain [1]

In [94]:
total = len(np.array(results_current))
current_wins = np.sum(np.array(results_current)<np.array(results_suggestion))
suggestion_wins = np.sum(np.array(results_current)>np.array(results_suggestion))
ties = np.sum(np.array(results_current)==np.array(results_suggestion))

print(f"Out of {total} runs: \nThe current implementation is more accurate in "+
      f"{current_wins} ({np.round_(current_wins/total*100,1)}%) cases.\n"+
      f"The suggested change is more accurate in {suggestion_wins}"+
      f"({np.round_(suggestion_wins/total*100,1)}%) cases.\n"+
      f"In {ties} ({np.round_(ties/total*100,1)}%) cases they return the same value.")

Out of 500 runs: 
The current implementation is more accurate in 177 (35.4%) cases.
The suggested change is more accurate in 323(64.6%) cases.
In 0 (0.0%) cases they return the same value.
