# Description: This notebook is an extension to our previous notebook. This notebook is used to add Holt Winter's Exponential Smoothing Forecast inputs to the daily BTC dataset.

In [1]:
#statsmodels.tsa.holtwinters.ExponentialSmoothing
#https://www.statsmodels.org/stable/examples/notebooks/generated/exponential_smoothing.html
#https://www.statsmodels.org/dev/generated/statsmodels.tsa.holtwinters.ExponentialSmoothing.html
# grid search holt winter's exponential smoothing
#this model can be applied to all cases (simple, trend, seasonality, any combination)
#by performing an exhaustive search of all the parameters and optimizing the error
#This code adapted from Jason Brownlee's Deep Learning for Time Series Forecasting
from datetime import datetime
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from pandas import read_csv
from numpy import array
import pandas as pd
import numpy as np

  import pandas.util.testing as tm


In [2]:
# one-step Holt Winter’s Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
	t,d,s,p,b,r = config
	# define model
	history = array(history)
	model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
	# fit model
	model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
	# make one step forecast
	yhat = model_fit.predict(len(history), len(history))
	return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
	return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
	return data[:-n_test], data[-n_test:]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
	predictions = list()
	# split dataset
	train, test = train_test_split(data, n_test)
	# seed history with training dataset
	history = [x for x in train]
	# step over each time-step in the test set
	for i in range(len(test)):
		# fit model and make forecast for history
		yhat = exp_smoothing_forecast(history, cfg)
		# store forecast in list of predictions
		predictions.append(yhat)
		# add actual observation to history for the next loop
		history.append(test[i])
	# estimate prediction error
	error = measure_rmse(test, predictions)
	return error

# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
	result = None
	# convert config to a key
	key = str(cfg)
	# show all warnings and fail on exception if debugging
	if debug:
		result = walk_forward_validation(data, n_test, cfg)
	else:
		# one failure during model validation suggests an unstable config
		try:
			# never show warnings when grid searching, too noisy
			with catch_warnings():
				filterwarnings("ignore")
				result = walk_forward_validation(data, n_test, cfg)
		except:
			error = None
	# check for an interesting result
	if result is not None:
		print(' > Model[%s] %.3f' % (key, result))
	return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test, parallel=False):
	scores = None
	if parallel:
		# execute configs in parallel
		executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
		tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
		scores = executor(tasks)
	else:
		scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
	# remove empty results
	scores = [r for r in scores if r[1] != None]
	# sort configs by error, asc
	scores.sort(key=lambda tup: tup[1])
	return scores

# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]):
	models = list()
	# define config lists
	t_params = ['add', 'mul', None]
	d_params = [True, False]
	s_params = ['add', 'mul', None]
	p_params = seasonal
	b_params = [True, False]
	r_params = [True, False]
	# create config instances
	for t in t_params:
		for d in d_params:
			for s in s_params:
				for p in p_params:
					for b in b_params:
						for r in r_params:
							cfg = [t,d,s,p,b,r]
							models.append(cfg)
	return models

if __name__ == '__main__':
  # load dataset
  series = read_csv('btc_dataset.csv', header=0, index_col=0)
  # Only keep the date and closing price
  series = series[['Closing Price (USD)']]
  data = series.values
	# data split
  n_test = 12
	# model configs
  cfg_list = exp_smoothing_configs(seasonal=[0,6,12])
	# grid search
  scores = grid_search(data[:,0], cfg_list, n_test)
  print('done')
	# list top 3 configs
  for cfg, error in scores[:3]:
    print(cfg, error)

 > Model[['add', True, 'add', 6, True, True]] 125.261
 > Model[['add', True, 'add', 6, True, False]] 125.592
 > Model[['add', True, 'add', 6, False, True]] 123.900
 > Model[['add', True, 'add', 6, False, False]] 124.692
 > Model[['add', True, 'add', 12, False, True]] 127.059
 > Model[['add', True, 'add', 12, False, False]] 127.831
 > Model[['add', True, 'mul', 6, True, True]] 127.626
 > Model[['add', True, 'mul', 6, True, False]] 128.251
 > Model[['add', True, 'mul', 6, False, True]] 132.612
 > Model[['add', True, 'mul', 6, False, False]] 133.307
 > Model[['add', True, 'mul', 12, True, True]] 124.426
 > Model[['add', True, 'mul', 12, True, False]] 125.282
 > Model[['add', True, 'mul', 12, False, True]] 136.710
 > Model[['add', True, 'mul', 12, False, False]] 137.205
 > Model[['add', True, None, 0, True, True]] 122.353
 > Model[['add', True, None, 0, True, False]] 122.693
 > Model[['add', True, None, 0, False, True]] 123.384
 > Model[['add', True, None, 0, False, False]] 124.173
 > Mode

In [None]:
# So we see that the best configuration that minimizes MSE (that also has use_boxcos set to False) is:
# [None, False, 'mul', 6, False, True] 110.84070442119378

# For reference:
# t,d,s,p,b,r = config
# t = trend
# d = damped
# s = seasonal
# p = seasonal periods
# b = use_boxcox
# r = remove_bias

In [3]:
# Now let's instantiate a new ExponentialSmoothing model using the best set of configs
btc_fit = ExponentialSmoothing(series, seasonal='mul', seasonal_periods=6).fit(optimized=True, use_boxcox=False, remove_bias=True)
btc_model_internals = pd.DataFrame(np.c_[series, btc_fit.level, btc_fit.slope, btc_fit.season, btc_fit.fittedvalues],
                  columns=[r'$y_t$',r'$l_t$',r'$b_t$',r'$s_t$',r'$\hat{y}_t$'],index=series.index)
btc_model_internals



Unnamed: 0_level_0,$y_t$,$l_t$,$b_t$,$s_t$,$\hat{y}_t$
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-10-01,123.654990,3998.580221,0.0,0.030926,124.288339
2013-10-02,125.455000,4032.630417,0.0,0.031110,128.553445
2013-10-03,108.584830,3652.392974,0.0,0.029746,124.802711
2013-10-04,118.674660,3847.244427,0.0,0.030851,116.564097
2013-10-05,121.338660,3885.795528,0.0,0.031226,124.284748
...,...,...,...,...,...
2020-10-05,10756.404585,327744.157259,0.0,0.032820,10589.154336
2020-10-06,10589.626394,324624.703568,0.0,0.032621,10701.313299
2020-10-07,10645.754787,326518.854976,0.0,0.032604,10584.804411
2020-10-08,10897.595432,333788.532038,0.0,0.032649,10651.586718


In [4]:
# The ExponentialSmoothing function allows us to decompose the time series into level, trend and seasonality components.
# We want to use these 3 components as inputs to our LSTM. 
btc_model_decomp = pd.DataFrame(np.c_[btc_fit.level, btc_fit.slope, btc_fit.season],
                  columns=[r'level',r'trend',r'season'],index=series.index)
btc_model_decomp

Unnamed: 0_level_0,level,trend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-10-01,3998.580221,0.0,0.030926
2013-10-02,4032.630417,0.0,0.031110
2013-10-03,3652.392974,0.0,0.029746
2013-10-04,3847.244427,0.0,0.030851
2013-10-05,3885.795528,0.0,0.031226
...,...,...,...
2020-10-05,327744.157259,0.0,0.032820
2020-10-06,324624.703568,0.0,0.032621
2020-10-07,326518.854976,0.0,0.032604
2020-10-08,333788.532038,0.0,0.032649


In [6]:
# Lets combine these 3 columns with our original dataset (merged.csv)
original_data = pd.read_csv('btc_dataset.csv')
btc_model_decomp.reset_index(level=0, inplace=True)
merged = original_data.merge(btc_model_decomp,how='outer',on='Date')
merged.set_index('Date',inplace=True)

merged

Unnamed: 0_level_0,Closing Price (USD),active_addresses,hash_rate,btc_left,total_addresses,difficulty,total_fees,fed_assets,GLD,IYE,SLV,SPY,TLT,UUP,NYFed_inflation,Google_popularity,level,trend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2013-10-01,123.654990,89218,1309351515956620,9220600.00,18656301,639173596179762000,39.139746,3.747387e+06,124.589996,47.430000,20.410000,169.339996,105.800003,21.610001,1.738484,3,3998.580221,0.0,0.030926
2013-10-02,125.455000,105303,1307159450402430,9215825.00,18708039,639173596179762000,39.923033,3.747387e+06,127.059998,47.540001,20.920000,169.179993,105.959999,21.530001,1.738484,3,4032.630417,0.0,0.031110
2013-10-03,108.584830,89993,1452700445873280,9210850.00,18751619,639173596179762000,36.602946,3.748998e+06,127.180000,47.099998,20.920000,167.619995,105.790001,21.510000,1.738484,3,3652.392974,0.0,0.029746
2013-10-04,118.674660,77370,1283409529949880,9206350.00,18791964,639173596179762000,24.772795,3.750609e+06,126.529999,47.520000,20.930000,168.889999,105.709999,21.610001,1.738484,3,3847.244427,0.0,0.030851
2013-10-05,121.338660,64961,1602048841926960,9200875.00,18823649,639173596179762000,26.062425,3.752220e+06,126.529999,47.520000,20.930000,168.889999,105.709999,21.610001,1.738484,3,3885.795528,0.0,0.031226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-05,10756.404585,945304,134263433083212000000,2490993.75,716519058,8.28846533403546e+22,98.282630,7.069358e+06,179.410004,16.110001,22.620001,339.760010,159.570007,25.260000,1.286484,13,327744.157259,0.0,0.032820
2020-10-06,10589.626394,978432,124011704101259000000,2490181.25,717016295,8.28846533403546e+22,113.311399,7.072003e+06,177.300003,15.870000,21.730000,334.929993,160.429993,25.330000,1.286484,13,324624.703568,0.0,0.032621
2020-10-07,10645.754787,1065070,147533744982949000000,2489237.50,717562057,8.28846533403546e+22,111.173305,7.074649e+06,177.220001,16.100000,22.090000,340.760010,159.259995,25.299999,1.286484,13,326518.854976,0.0,0.032604
2020-10-08,10897.595432,1039051,152854080428230000000,2488225.00,718072551,8.28846533403546e+22,103.139640,7.074649e+06,177.850006,16.700001,22.180000,343.779999,160.119995,25.290001,1.286484,13,333788.532038,0.0,0.032649


In [7]:
# Write to csv file
merged.to_csv('btc_dataset_exponentialsmoothing.csv')