# Description: This notebook is used to add Holt Winter's Exponential Smoothing Forecast inputs to our dataset.

In [None]:
#statsmodels.tsa.holtwinters.ExponentialSmoothing
#https://www.statsmodels.org/stable/examples/notebooks/generated/exponential_smoothing.html
#https://www.statsmodels.org/dev/generated/statsmodels.tsa.holtwinters.ExponentialSmoothing.html
# grid search holt winter's exponential smoothing
#this model can be applied to all cases (simple, trend, seasonality, any combination)
#by performing an exhaustive search of all the parameters and optimizing the error
#This code adapted from Jason Brownlee's Deep Learning for Time Series Forecasting
from datetime import datetime
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from pandas import read_csv
from numpy import array
import pandas as pd
import numpy as np

  import pandas.util.testing as tm


In [None]:
# one-step Holt Winter’s Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
	t,d,s,p,b,r = config
	# define model
	history = array(history)
	model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
	# fit model
	model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
	# make one step forecast
	yhat = model_fit.predict(len(history), len(history))
	return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
	return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
	return data[:-n_test], data[-n_test:]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
	predictions = list()
	# split dataset
	train, test = train_test_split(data, n_test)
	# seed history with training dataset
	history = [x for x in train]
	# step over each time-step in the test set
	for i in range(len(test)):
		# fit model and make forecast for history
		yhat = exp_smoothing_forecast(history, cfg)
		# store forecast in list of predictions
		predictions.append(yhat)
		# add actual observation to history for the next loop
		history.append(test[i])
	# estimate prediction error
	error = measure_rmse(test, predictions)
	return error

# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
	result = None
	# convert config to a key
	key = str(cfg)
	# show all warnings and fail on exception if debugging
	if debug:
		result = walk_forward_validation(data, n_test, cfg)
	else:
		# one failure during model validation suggests an unstable config
		try:
			# never show warnings when grid searching, too noisy
			with catch_warnings():
				filterwarnings("ignore")
				result = walk_forward_validation(data, n_test, cfg)
		except:
			error = None
	# check for an interesting result
	if result is not None:
		print(' > Model[%s] %.3f' % (key, result))
	return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test, parallel=False):
	scores = None
	if parallel:
		# execute configs in parallel
		executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
		tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
		scores = executor(tasks)
	else:
		scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
	# remove empty results
	scores = [r for r in scores if r[1] != None]
	# sort configs by error, asc
	scores.sort(key=lambda tup: tup[1])
	return scores

# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]):
	models = list()
	# define config lists
	t_params = ['add', 'mul', None]
	d_params = [True, False]
	s_params = ['add', 'mul', None]
	p_params = seasonal
	b_params = [True, False]
	r_params = [True, False]
	# create config instances
	for t in t_params:
		for d in d_params:
			for s in s_params:
				for p in p_params:
					for b in b_params:
						for r in r_params:
							cfg = [t,d,s,p,b,r]
							models.append(cfg)
	return models

if __name__ == '__main__':
  # load dataset
  series = read_csv('merged.csv', header=0, index_col=0)
  # Only keep the date and closing price
  series = series[['Closing Price (USD)']]
  data = series.values
	# data split
  n_test = 12
	# model configs
  cfg_list = exp_smoothing_configs(seasonal=[0,6,12])
	# grid search
  scores = grid_search(data[:,0], cfg_list, n_test)
  print('done')
	# list top 3 configs
  for cfg, error in scores[:3]:
    print(cfg, error)

 > Model[['add', True, 'add', 6, True, True]] 938.795
 > Model[['add', True, 'add', 6, True, False]] 938.084
 > Model[['add', True, 'add', 6, False, True]] 943.081
 > Model[['add', True, 'add', 6, False, False]] 941.558
 > Model[['add', True, 'add', 12, True, True]] 930.720
 > Model[['add', True, 'add', 12, True, False]] 930.583
 > Model[['add', True, 'add', 12, False, True]] 957.423
 > Model[['add', True, 'add', 12, False, False]] 956.273
 > Model[['add', True, 'mul', 6, True, True]] 1000.384
 > Model[['add', True, 'mul', 6, True, False]] 993.118
 > Model[['add', True, 'mul', 6, False, True]] 967.824
 > Model[['add', True, 'mul', 6, False, False]] 962.078
 > Model[['add', True, 'mul', 12, True, True]] 927.353
 > Model[['add', True, 'mul', 12, True, False]] 927.313
 > Model[['add', True, 'mul', 12, False, True]] 879.553
 > Model[['add', True, 'mul', 12, False, False]] 879.037
 > Model[['add', True, None, 0, True, True]] 821.976
 > Model[['add', True, None, 0, True, False]] 821.540
 > M

In [None]:
# So we see that the best configuration that minimizes MSE is:
# [None, False, 'mul', 12, False, True] 752.775262722884

# For reference:
# t,d,s,p,b,r = config
# t = trend
# d = damped
# s = seasonal
# p = seasonal periods
# b = use_boxcox
# r = remove_bias

In [None]:
# Now let's instantiate a new ExponentialSmoothing model using the best set of configs
btc_fit = ExponentialSmoothing(series, seasonal='mul', seasonal_periods=12).fit(optimized=True, use_boxcox=False, remove_bias=True)
btc_model_internals = pd.DataFrame(np.c_[series, btc_fit.level, btc_fit.slope, btc_fit.season, btc_fit.fittedvalues],
                  columns=[r'$y_t$',r'$l_t$',r'$b_t$',r'$s_t$',r'$\hat{y}_t$'],index=series.index)
btc_model_internals



Unnamed: 0_level_0,$y_t$,$l_t$,$b_t$,$s_t$,$\hat{y}_t$
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-10,6789.267500,6743.081021,0.0,1.008414,7734.108793
2018-04-17,8030.570000,7955.891970,0.0,1.012585,6701.513995
2018-04-24,9191.820000,8462.947703,0.0,1.086610,8595.667217
2018-05-01,8949.876240,7655.385737,0.0,1.170497,10033.381999
2018-05-08,9424.991240,7968.788517,0.0,1.182961,9027.673335
...,...,...,...,...,...
2020-09-15,10829.505307,10279.724209,0.0,1.054677,9719.397746
2020-09-22,10531.164563,10562.272600,0.0,0.997141,10232.349229
2020-09-29,10764.284371,10712.824141,0.0,1.004827,10610.547867
2020-10-06,10589.626394,10858.481640,0.0,0.975261,10446.169816


In [None]:
# The ExponentialSmoothing function allows us to decompose the time series into level, trend and seasonality components.
# We want to use these 3 components as inputs to our LSTM. 
btc_model_decomp = pd.DataFrame(np.c_[btc_fit.level, btc_fit.slope, btc_fit.season],
                  columns=[r'level',r'trend',r'season'],index=series.index)
btc_model_decomp

Unnamed: 0_level_0,level,trend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-04-10,6743.081021,0.0,1.008414
2018-04-17,7955.891970,0.0,1.012585
2018-04-24,8462.947703,0.0,1.086610
2018-05-01,7655.385737,0.0,1.170497
2018-05-08,7968.788517,0.0,1.182961
...,...,...,...
2020-09-15,10279.724209,0.0,1.054677
2020-09-22,10562.272600,0.0,0.997141
2020-09-29,10712.824141,0.0,1.004827
2020-10-06,10858.481640,0.0,0.975261


In [None]:
# Lets combine these 3 columns with our original dataset (merged.csv)
original_data = pd.read_csv('merged.csv')
btc_model_decomp.reset_index(level=0, inplace=True)
merged = original_data.merge(btc_model_decomp,how='outer',on='Date')
merged.set_index('Date',inplace=True)

merged

Unnamed: 0_level_0,Closing Price (USD),active_addresses,hash_rate,btc_left,total_addresses,difficulty,total_fees,fed_assets,GLD,IYE,SLV,SPY,TLT,UUP,NYFed_inflation,Google_popularity,Pct_of_OI_Dealer_Long_All,Pct_of_OI_Dealer_Short_All,Pct_of_OI_Lev_Money_Long_All,Pct_of_OI_Lev_Money_Short_All,Pct_Change_Dealer_Net_Position,Pct_Change_Lev_Money_Net_Position,level,trend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
2018-04-10,6789.267500,3758023,2.725244e+19,4029825.00,386630566,1.507989e+22,177.408791,4.384030e+06,127.120003,38.299999,15.620000,265.149994,121.080002,23.510000,3.145908,18,0.0,0.0,53.0,65.1,0.0,0.000000,6743.081021,0.0,1.008414
2018-04-17,8030.570000,4231315,2.837883e+19,4016287.50,388596483,1.648974e+22,194.758219,4.385586e+06,127.750000,39.740002,15.800000,270.190002,121.269997,23.459999,3.145908,18,0.0,0.0,51.9,64.7,0.0,0.262136,7955.891970,0.0,1.012585
2018-04-24,9191.820000,4433517,3.016500e+19,4002462.50,390723140,1.648974e+22,226.703255,4.374746e+06,126.230003,40.009998,15.760000,262.980011,117.949997,23.840000,3.145908,18,0.0,0.0,50.2,70.7,0.0,0.876923,8462.947703,0.0,1.086610
2018-05-01,8949.876240,4403620,2.823941e+19,3989900.00,392808416,1.727461e+22,360.261563,4.358523e+06,123.709999,40.230000,15.240000,264.980011,118.410004,24.290001,3.192445,14,0.0,0.0,60.2,67.7,0.0,-0.692623,7655.385737,0.0,1.170497
2018-05-08,9424.991240,4526336,2.902515e+19,3977200.00,394914118,1.727461e+22,251.320580,4.357910e+06,124.589996,41.029999,15.510000,266.920013,118.690002,24.469999,3.192445,14,0.0,0.0,51.7,68.7,0.0,1.820000,7968.788517,0.0,1.182961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-09-15,10829.505307,6709189,1.367278e+20,2509418.75,706939486,7.450049e+22,554.555482,7.056781e+06,183.449997,17.309999,25.230000,340.170013,164.070007,25.139999,1.253407,12,0.0,0.0,30.0,55.7,-1.0,0.263794,10279.724209,0.0,1.054677
2020-09-22,10531.164563,6773880,1.364107e+20,2502775.00,710296832,8.295582e+22,434.075983,7.089063e+06,178.649994,17.080000,22.790001,330.299988,164.270004,25.379999,1.253407,12,0.0,0.0,27.1,53.2,0.0,-0.085239,10562.272600,0.0,0.997141
2020-09-29,10764.284371,6725390,1.399478e+20,2496400.00,713661677,8.295582e+22,474.643046,7.061419e+06,178.190002,16.010000,22.500000,332.369995,164.839996,25.350000,1.253407,12,0.0,0.0,27.4,64.6,0.0,0.239091,10712.824141,0.0,1.004827
2020-10-06,10589.626394,6892654,1.363034e+20,2490181.25,717016295,8.288465e+22,658.892114,7.072003e+06,177.300003,15.870000,21.730000,334.929993,160.429993,25.330000,1.286484,13,0.0,0.0,28.7,64.6,0.0,-0.009171,10858.481640,0.0,0.975261


In [None]:
# Write to csv file
merged.to_csv('merged_exponentialsmoothing.csv')