# How to Develop Simple Methods for Univariate Forecasting

In [1]:
# one-step naive forecast
def naive_forecast(history, n):
    return history[-n]

In [2]:
# define dataset
data = [10,20,30,40,50,60,70,80,90,100]
print(data)
# test naive forecast
for i in range(1, len(data)+1):
    print(naive_forecast(data,i))


[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
100
90
80
70
60
50
40
30
20
10


In [6]:
from numpy import mean, median
# one-step average forecast
def average_forecast(history: list, config: tuple):
    n, avg_type = config
    # mean of last n values
    if avg_type is 'mean':
        return mean(history[-n:])
    # median of last n values
    return median(history[-n:])

  if avg_type is 'mean':


In [7]:
# define dataset
data = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
print(data)
# test naive forecast
for i in range(i, len(data)+1):
    print(average_forecast(data, (i, 'mean')))

[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
55.0


We can update the function to support averaging over seasonal data, respecting the seasonal offset. An offset argument can be added to the function that when not set to 1 will determine the number of prior observations backwards to count before collecting values from which to include the average. For example, if $\footnotesize n = 1 $, and $\footnotesize offset = 3$, then the average is calculated from the single value at $ \footnotesize n \times offset$  or $ \footnotesize 1 \times 3 = -3 $. If $\footnotesize n = 2$ and $\footnotesize offset = 3$, then the average is calculated from the values at $\footnotesize 1 \times 3$ or $\footnotesize-3$ and $\footnotesize 2 \times 3$ or $\footnotesize 6$.

We can also add some protection to raise an exception when seasonal configuration (n x offset) extends beyond the end of the historical observationts. 

The updated function is listed below

In [2]:
# example of an average forecast for seasonal data
from numpy import mean
from numpy import median

# one-step average foracast
def average_forecast(history, config):
    n, offset, avg_type = config
    values = list()
    if offset == 1:
        values = history[-n:]
    else:
        if n*offset > len(history):
            raise Exception('Config beyond end of data: %d %d' %(n, offset))
        # try to collect n values using offset
        for i in range(1, n+1):
            ix = i*offset
            values.append(history[-ix])
    # mean of last n values
    if avg_type is 'mean':
        return mean(values)
    # median of last n values
    return median(values)

  if avg_type is 'mean':


In [3]:
# define dataset
data = [10.0, 20.0, 30.0, 10.0, 20.0, 30.0, 10.0, 20.0, 30.0]
print(data)
# test naive forecast
for i in [1, 2, 3]:
    print(average_forecast(data, (i, 3, 'mean')))

[10.0, 20.0, 30.0, 10.0, 20.0, 30.0, 10.0, 20.0, 30.0]
10.0
10.0
10.0


It is possible to combine both the naive and the average forecast strategies together into the same function. There is a little overlap between the methods, specifically the n-offset into the history that is used to either persist values or determine the number of values to average.

It is helpful to have both strategies supported by one function so that we can test a suite of conffigurations for both strategies at once as part of a broader grid search of simple models. The simple forecast() function below combines both strategies into a single function.

In [5]:
# one-step simple forecast
def simple_forecast(history, config):
    n, offset, avg_type = config
    # persist value, ignore other config
    if avg_type == 'persist':
        return history[-n]
    # collect values to average
    values = list()
    if offset == 1:
        values = history[-n:]
    else:
        # skip bad configs
        if n*offset > len(history):
            raise Exception('Config beyond end of data: %d %d' % (n,offset))
        # try and collect n values using offset
        for i in range(1, n+1):
            ix = i * offset
        values.append(history[-ix])
    # check if we can average
    if len(values) < 2:
        raise Exception('Cannot calculate average')
    # mean of last n values
    if avg_type == 'mean':
        return mean(values)
    # median of last n values
    return median(values)

Next, we need to build up some functions for fitting and evaluating a model repeatedly via walk-forward validation, including splitting a dataset into train and test sets and evaluating one-step forecasts. We can split a list or NumPy array of data using a slice given a specfied size of the split, e.g. the number of time steps to use from the data in the test set. The train test split() function below implements this for a provided dataset and a specfied number of time steps to use in the test set.

In [6]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [7]:
from math import sqrt
from sklearn.metrics import mean_squared_error
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [8]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = simple_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error

In [11]:
from warnings import catch_warnings
from warnings import filterwarnings
# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
    # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
            error = None
    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

In [12]:
from joblib import Parallel
from multiprocessing import cpu_count
# define executor
executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')