## MultiStep Forecasting with Machine Learning Nonlinear Models

# Load and set up dataframe 

In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
PATH = ''
df = pd.read_csv(PATH)
df.columns =['timestamp', 'region_1', 'region_2', 'region_3','region_4','region_5','region_6']
df.shape
df.head(5)

Unnamed: 0,timestamp,region_1,region_2,region_3,region_4,region_5,region_6
0,1.0,154.0,184.0,107.0,119.0,94.0,89.0
1,2.0,322.0,358.0,196.0,236.0,147.0,176.0
2,3.0,469.0,451.0,334.0,375.0,165.0,246.0
3,4.0,574.0,567.0,445.0,460.0,190.0,307.0
4,5.0,608.0,628.0,490.0,533.0,240.0,348.0


# Import nonlinear algorithms from sklearn

In [None]:
# spot check nonlinear algorithms
from numpy import load
from numpy import loadtxt
from numpy import nan
from numpy import isnan
from numpy import count_nonzero
from numpy import unique
from numpy import array
from sklearn.base import clone
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor

 

# Verify chunks , check for logical errors 


In [None]:

# split into standard hours
from numpy import split
from numpy import array
from pandas import read_csv
 
# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
	train, test = data[:1152], data[1152:]
	# restructure into windows of weekly data
	train = array(split(train, int(len(train)/6)))
	test = array(split(test, len(test)/6))
	return train, test
 
# load the new file
df2 = df[['region_1']]
train, test = split_dataset(df2.values)
# validate train data
print(train.shape)
print(train[0, 0, 0], train[-1, -1, 0])
# validate test
print(test.shape)
print(test[0, 0, 0], test[-1, -1, 0])

(192, 6, 1)
154.0 0.0
(144, 6, 1)
0.0 582.0


# Forecasting (implementation)


**Direct Multi Step Forecast ( 30 mins Formatted )**

In [None]:
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
   
# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
	train, test = data[:1152], data[1152:]
	# restructure into windows of weekly data
	train = array(split(train, int(len(train)/6)))
	test = array(split(test, len(test)/6))
	return train, test
 
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
	scores = list()
	scores2 = list()
	# calculate an RMSE score for each day
	for i in range(actual.shape[1]):
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
    #calculate mae

		mae = mean_absolute_error(actual[:, i], predicted[:, i])
		# store
		scores.append(rmse)
		scores2.append(mae)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
	return score, scores, scores2
 
# summarize scores
def summarize_scores(name, score, scores, scores2):
	s_scores = ', '.join(['%.4f' % s for s in scores])
	s_scores2 = ', '.join(['%.4f' % s for s in scores2])
	print('%s: [%.3f] %s' % (name, score, s_scores))
	print('%s: %s' % (name, s_scores2))
 
# prepare a list of ml models
def get_models(models=dict()):
	# non-linear models
	models['knn'] = KNeighborsRegressor(n_neighbors=7) # Done 
	models['cart'] = DecisionTreeRegressor()
	models['extra'] = ExtraTreeRegressor()
	models['svmr'] = SVR() # Done
	# # ensemble models
	n_trees = 100
	models['ada'] = AdaBoostRegressor(n_estimators=n_trees) # Done
	models['bag'] = BaggingRegressor(n_estimators=n_trees) # Done 
	models['rf'] = RandomForestRegressor(n_estimators=n_trees) # Done
	models['et'] = ExtraTreesRegressor(n_estimators=n_trees)
	models['gbm'] = GradientBoostingRegressor(n_estimators=n_trees) # Done
	print('Defined %d models' % len(models))
	return models
 
# create a feature preparation pipeline for a model
def make_pipeline(model):
	steps = list()
	# standardization
	steps.append(('standardize', StandardScaler()))
	# normalization
	steps.append(('normalize', MinMaxScaler()))
	# the model
	steps.append(('model', model))
	# create pipeline
	pipeline = Pipeline(steps=steps)
	return pipeline
 
# convert history into inputs and outputs
def to_supervised(history, output_ix):
	X, y = list(), list()
	# step over the entire history one time step at a time
	for i in range(len(history)-1):
		X.append(history[i][:,0])
		y.append(history[i + 1][output_ix,0])
	return array(X), array(y)
 
# fit a model and make a forecast
def sklearn_predict(model, history):
	yhat_sequence = list()
	# fit a model for each forecast day
	for i in range(6):
		# prepare data
		train_x, train_y = to_supervised(history, i)
		# make pipeline
		pipeline = make_pipeline(model)
		# fit the model
		pipeline.fit(train_x, train_y)
		# forecast
		x_input = array(train_x[-1, :]).reshape(1,6)
		yhat = pipeline.predict(x_input)[0]
		# store
		yhat_sequence.append(yhat)
	return yhat_sequence
 
# evaluate a single model
def evaluate_model(model, train, test):
	# history is a list of weekly data
	history = [x for x in train]
	# walk-forward validation over each week
	predictions = list()
	for i in range(len(test)):
		# predict the week
		yhat_sequence = sklearn_predict(model, history)
		# store the predictions
		predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
		history.append(test[i, :])
	predictions = array(predictions)
	# evaluate predictions days for each week
	score, scores, scores2 = evaluate_forecasts(test[:, :, 0], predictions)
	return score, scores, scores2
 

df2 = df[['region_1']]
# split into train and test
train, test = split_dataset(df2.values)
# prepare the models to evaluate
models = get_models()
# evaluate each model

for name, model in models.items():
	# evaluate and get scores
	score, scores, scores2 = evaluate_model(model, train, test)
	# summarize scores
	summarize_scores(name, score, scores, scores2)
	# plot scores

