# How to Develop ETS Models for Univariate Forecasting

The following is a good reference for Holt-Winters Smoothing:
https://medium.com/analytics-vidhya/a-thorough-introduction-to-holt-winters-forecasting-c21810b8c0e6

In [1]:
# grid search holt winter's exponential smoothing
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from numpy import array
from pandas import read_csv

In [2]:
# one-step Holt Winters Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
    t,d,s,p,b,r = config
    # define model
    history = array(history)
    model = ExponentialSmoothing(history, trend=t, damped_trend=d, seasonal=s, seasonal_periods=p, use_boxcox=b)
    # fit model
    model_fit = model.fit(optimized=True, remove_bias=r)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

In [3]:
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [4]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [5]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = exp_smoothing_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error

In [6]:
# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
            error = None
    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

In [7]:
# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
        # execute configs in parallel
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

In [8]:
# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]):
    models = list()
    # seasonal = [2]
    # define config lists
    t_params = ['add', 'mul', None]
    d_params = [True, False]
    s_params = ['add', 'mul', None]
    p_params = seasonal
    b_params = [True, False]
    r_params = [True, False]
    # create config instances
    for t in t_params:
        for d in d_params:
            for s in s_params:
                for p in p_params:
                    for b in b_params:
                        for r in r_params:
                            cfg = [t,d,s,p,b,r]
                            models.append(cfg)
    return models

In [9]:
if __name__ == '__main__':
    # define dataset
    data = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
    print(data)
    # data split
    n_test = 4
    # model configs
    cfg_list = exp_smoothing_configs()
    # grid search
    scores = grid_search(data, cfg_list, n_test, parallel = False)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
 > Model[['add', True, None, None, True, True]] 0.953


 > Model[['add', True, None, None, True, False]] 0.964
 > Model[['add', True, None, None, False, True]] 0.115
 > Model[['add', True, None, None, False, False]] 0.121
 > Model[['add', False, None, None, True, True]] 2.416
 > Model[['add', False, None, None, True, False]] 2.415
 > Model[['add', False, None, None, False, True]] 0.000
 > Model[['add', False, None, None, False, False]] 0.000
 > Model[['mul', True, None, None, True, True]] 1.755
 > Model[['mul', True, None, None, True, False]] 0.808
 > Model[['mul', True, None, None, False, True]] 1.612
 > Model[['mul', True, None, None, False, False]] 0.900
 > Model[['mul', False, None, None, True, True]] 1.941
 > Model[['mul', False, None, None, True, False]] 1.680
 > Model[['mul', False, None, None, False, True]] 1.584
 > Model[['mul', False, None, None, False, False]] 1.611
 > Model[[None, False, None, None, True, True]] 1.380
 > Model[[None, False, None, None, True, False]] 10.000
 > Model[[None, False, None, None, False, True]] 2.563
 

## Case Study 1: No Trend or Seasonality

In [10]:
if __name__ == '__main__':
    # load dataset
    series = read_csv('../Data/Chapter 12/daily-total-female-births.csv', header=0, index_col=0)
    data = series.values
    # data split
    n_test = 165
    # model configs
    cfg_list = exp_smoothing_configs()
    # grid search
    scores = grid_search(data[:,0], cfg_list, n_test, parallel = False)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

 > Model[['add', True, None, None, True, True]] 7.132
 > Model[['add', True, None, None, True, False]] 7.184
 > Model[['add', True, None, None, False, True]] 7.130
 > Model[['add', True, None, None, False, False]] 7.137
 > Model[['add', False, None, None, True, True]] 7.129
 > Model[['add', False, None, None, True, False]] 7.129
 > Model[['add', False, None, None, False, True]] 7.123
 > Model[['add', False, None, None, False, False]] 7.123
 > Model[['mul', True, None, None, True, True]] 17.207
 > Model[['mul', True, None, None, True, False]] 83.072
 > Model[['mul', True, None, None, False, True]] 8.364
 > Model[['mul', True, None, None, False, False]] 25.775
 > Model[['mul', False, None, None, True, True]] 7.150
 > Model[['mul', False, None, None, True, False]] 7.139
 > Model[['mul', False, None, None, False, True]] 7.166
 > Model[['mul', False, None, None, False, False]] 7.159
 > Model[[None, False, None, None, True, True]] 7.173
 > Model[[None, False, None, None, True, False]] 7.218


## Case Study 2: Trend

In [11]:
if __name__ == '__main__':
    # load dataset
    series = read_csv('../Data/Chapter 12/shampoo.csv', header=0, index_col=0)
    data = series.values
    # data split
    n_test = 12
    # model configs
    cfg_list = exp_smoothing_configs()
    # grid search
    scores = grid_search(data[:,0], cfg_list, n_test, parallel = False)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

 > Model[['add', True, None, None, True, True]] 86.892
 > Model[['add', True, None, None, True, False]] 89.946
 > Model[['add', True, None, None, False, True]] 87.748
 > Model[['add', True, None, None, False, False]] 90.545
 > Model[['add', False, None, None, True, True]] 101.065
 > Model[['add', False, None, None, True, False]] 101.210
 > Model[['add', False, None, None, False, True]] 88.512
 > Model[['add', False, None, None, False, False]] 89.824
 > Model[['mul', True, None, None, True, True]] 180.935
 > Model[['mul', True, None, None, True, False]] 273.196
 > Model[['mul', True, None, None, False, True]] 88.738
 > Model[['mul', True, None, None, False, False]] 87.934
 > Model[['mul', False, None, None, True, True]] 94.034
 > Model[['mul', False, None, None, True, False]] 95.790
 > Model[['mul', False, None, None, False, True]] 95.073
 > Model[['mul', False, None, None, False, False]] 93.361
 > Model[[None, False, None, None, True, True]] 96.682
 > Model[[None, False, None, None, Tr

## Case Study 3: Seasonality

In [12]:
if __name__ == '__main__':
    # load dataset
    series = read_csv('../Data/Chapter 12/monthly-mean-temp.csv', header=0, index_col=0)
    data = series.values
    # trim dataset to 5 years
    data = data[-(5*12):]
    # data split
    n_test = 12
    # model configs
    cfg_list = exp_smoothing_configs(seasonal=[0,12])
    # grid search
    scores = grid_search(data[:,0], cfg_list, n_test, parallel = False)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

 > Model[['add', True, 'add', 12, True, True]] 1.512
 > Model[['add', True, 'add', 12, True, False]] 1.515
 > Model[['add', True, 'add', 12, False, True]] 1.508
 > Model[['add', True, 'add', 12, False, False]] 1.508
 > Model[['add', True, 'mul', 12, True, True]] 1.504
 > Model[['add', True, 'mul', 12, True, False]] 1.507
 > Model[['add', True, 'mul', 12, False, True]] 1.505
 > Model[['add', True, 'mul', 12, False, False]] 1.505
 > Model[['add', True, None, 0, True, True]] 4.677
 > Model[['add', True, None, 0, True, False]] 4.602
 > Model[['add', True, None, 0, False, True]] 4.824
 > Model[['add', True, None, 0, False, False]] 4.776
 > Model[['add', True, None, 12, True, True]] 4.677
 > Model[['add', True, None, 12, True, False]] 4.602
 > Model[['add', True, None, 12, False, True]] 4.824
 > Model[['add', True, None, 12, False, False]] 4.776
 > Model[['add', False, 'add', 12, True, True]] 1.643
 > Model[['add', False, 'add', 12, True, False]] 1.633
 > Model[['add', False, 'add', 12, Fals

## Case Study 4: Random Walk; Stock Price

In [13]:
if __name__ == '__main__':
    # load dataset
    series = read_csv('../Data/PH-historical-stock-price-data-csv/GLO.csv', header=0, index_col=0)
    data = series['Close'].tail(200).values
    # data split
    n_test = 25
    # model configs
    cfg_list = exp_smoothing_configs()
    # grid search
    scores = grid_search(data, cfg_list, n_test, parallel = False)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

 > Model[['add', True, None, None, False, True]] 21.275
 > Model[['add', True, None, None, False, False]] 21.141
 > Model[['add', False, None, None, False, True]] 20.957
 > Model[['add', False, None, None, False, False]] 20.867
 > Model[['mul', True, None, None, False, True]] 21.376
 > Model[['mul', True, None, None, False, False]] 21.990
 > Model[['mul', False, None, None, False, True]] 20.784
 > Model[['mul', False, None, None, False, False]] 20.819
 > Model[[None, False, None, None, False, True]] 20.939
 > Model[[None, False, None, None, False, False]] 21.248
done
['mul', False, None, None, False, True] 20.783740583139885
['mul', False, None, None, False, False] 20.81948293884651
['add', False, None, None, False, False] 20.867142899363245
