In [1]:
pip install scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# transform a time series dataset into a supervised learning dataset
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols = list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
	# put it all together
	agg = pd.concat(cols, axis=1)
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg.values

In [4]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
	return data[:-n_test, :], data[-n_test:, :]

In [5]:
# fit an random forest model and make a one step prediction
def random_forest_forecast(train, testX):
    # transform list into array
    train = np.asarray(train)
    # split into input and output columns
    trainX, trainy = train[:, :-1], train[:, -1]
    # fit model
    model = RandomForestClassifier(n_estimators=1000)
    model.fit(trainX, trainy)
    # make a one-step prediction
    yhat = model.predict([testX])
    return yhat[0]

In [6]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(n_test):
      # define new test sample fromo previous history and test[i] parameters
      new_test = np.append(history[-1][8:], test[i, -8:])
      # split test row into input and output columns
      testX, testy = new_test[:-1], new_test[-1]
      # fit model on history and make a prediction
      yhat = random_forest_forecast(history, testX)
      # store forecast in list of predictions
      predictions.append(yhat)
      # add actual observation to history for the next loop
      new_test[-1] = yhat
      history.append(new_test)
      # summarize progress
      print('%d >expected=%.1f, predicted=%.1f' % (i, testy, yhat))
    # estimate prediction error
    #error = mean_absolute_error(test[:, -1], predictions)
    return test[:, -1], predictions

In [7]:
df = pd.read_csv('/content/drive/MyDrive/electricity_Dataset/original/14 Week.csv', 
                 usecols= ['day','period', 'nswprice', 'nswdemand', 'vicprice', 'vicdemand', 'transfer', 'class'])
df["class"] = df["class"].map({"UP": 1, "DOWN": 2})

data = series_to_supervised(df, 48)

In [None]:
# evaluate
test_size = 48
y_true, y_pred = walk_forward_validation(data, test_size)
# plot expected vs predicted
pyplot.plot(y_true, label='Expected')
pyplot.plot(y_pred, label='Predicted')
pyplot.legend()
pyplot.show()
print(confusion_matrix(y_true, y_pred))
print(1 - np.mean( y_true != y_pred ))