In [1]:
import sys
sys.path.append("../../")
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, metrics

from autosklearn.regression import AutoSklearnRegressor

from Class_Solar import Solar

NUMBER_ATTRIBUTES = 15
BEGIN_AT = 0
_VALUETOTEST = 8
_NUMBERZONES = 3
_ZONETOTEST = 2 # ZoneID - 1
PleaseShowMe = False
np.set_printoptions(precision=5, suppress=True)

s_train = Solar("../../solar_training.csv", skip_f=24)
print("Shape of training data (baseline):", s_train.data.shape)
s_train_24ahead = Solar("../../solar_training_24ahead.csv")
print("Shape of training data (24 hours ahead):", s_train_24ahead.data.shape)
s_test = Solar("../../solar_test.csv")
print("Shape of test data (baseline):", s_test.data.shape)
s_test_24behind = Solar("../../solar_test_24behind.csv", skip_f=24)
print("Shape of test data (24 hours behind):", s_test_24behind.data.shape)

RMSE_Scores = [0,0,0]
MAE_Scores = [0,0,0]

ModelsToTest = [AutoSklearnRegressor(time_left_for_this_task=30, n_jobs=-1, memory_limit=4096)]
OutputResults = dict()

Shape of training data (baseline): (10920, 14)
Shape of training data (24 hours ahead): (10920, 14)
Shape of test data (baseline): (48168, 14)
Shape of test data (24 hours behind): (48168, 14)


In [2]:
def truncate(num, digits):
  l = str(float(num)).split('.')
  digits = min(len(l[1]), digits)
  return l[0] + '.' + l[1][:digits]

In [3]:
def makeModel(m):
  model = m
  #model.fit(s_train.data, s_train.power) #Used for current time predictions
  model.fit(s_train.data, s_train_24ahead.power) #Used for 24 hour ahead predictions
  return model

In [4]:
from copy import deepcopy

def runModel():
  OutputResults[str(regr)] = {}
  for z in range(0, _NUMBERZONES):
    OutputResults[str(regr)][z] = {}
    # Since there are multiple zones, we may need to shave off 24 hours.
    if len(s_test_24behind.zonedata[z]) > len(s_test.zonepower[z]):
      s_test_24behind.zonedata[z] = s_test_24behind.zonedata[z][:-24]
    elif len(s_test_24behind.zonedata[z]) < len(s_test.zonepower[z]):
      s_test.zonepower[z] = s_test.zonepower[z][24:]

    #y_pred_test = regr.predict(s_test.zonedata[z]) #Used for current time predictions
    y_pred_test = regr.predict(s_test_24behind.zonedata[z]) #Used for 24 hour ahead predictions
    # Scoring // Current Time or 24 Hours Ahead
    try: # Zones of equal length
      RMSE_Scores[z] = metrics.mean_squared_error(s_test.zonepower[z], y_pred_test, squared=False)
      OutputResults[str(regr)][z]["RMSE"] = deepcopy(RMSE_Scores[z])
      MAE_Scores[z] = metrics.mean_absolute_error(s_test.zonepower[z], y_pred_test)
      OutputResults[str(regr)][z]["MAE"] = deepcopy(MAE_Scores[z])
    except ValueError:
      pass

    if z == _ZONETOTEST:
      #plotPredictVsActual(s_test.zonepower[_ZONETOTEST], y_pred_test)
      #plotCurve(s_test.zonepower[_ZONETOTEST], y_pred_test, 'summer')
      pass

In [5]:
import pandas as pd

def printScores():
  df_leader = pd.DataFrame()
  RMSE_out = str()
  MAE_out = str()
  RMSE_avg = 0
  MAE_avg = 0
  for i in range(3):
    RMSE_out += ("\t" + truncate(RMSE_Scores[i], 6))
    RMSE_avg += RMSE_Scores[i]
    MAE_out += ("\t" + truncate(MAE_Scores[i], 6))
    MAE_avg += MAE_Scores[i]
  RMSE_out += ("\t" + truncate(RMSE_avg/3.0, 6))
  MAE_out += ("\t" + truncate(MAE_avg/3.0, 6))

  # print("\n\t\t###### Scoring Metrics ######")
  # print("\tZone 1\t\tZone 2\t\tZone 3\t\tOverall")
  # print(f"RMSE{RMSE_out}")
  # print(f"MAE{MAE_out}")
  OutputResults[str(regr)]["RMSE"] = RMSE_avg/3.0
  OutputResults[str(regr)]["MAE"] = MAE_avg/3.0
  try:
    OutputResults[str(model)]["Final Ensemble"] = str(model.show_models())
  except:
    pass
  try:
    if df_leader.empty: df_leader = model.leaderboard()
    else: df_leader = pd.concat([df_leader, model.leaderboard()], ignore_index=False)
  except: pass
  try:
    OutputResults[str(model)]["Sprint"] = str(model.sprint_statistics())
  except KeyError:
    pass

In [6]:
import json
from os import path

def writeMe():
  with open("Traditional_SameNetwork.json", "a") as f:
    f.write(json.dumps(OutputResults))
  with open("Traditional_SameNetwork_Results.csv", "a") as f2:
    if not path.exists("Traditional_SameNetwork_Results.csv"): f2.write("Model,Zone,MAE\n")
    for model in OutputResults:
      for zone in OutputResults[model]:
        if type(zone) == int:
          mdl = str(model).split('(')[0]
          f2.write(f"{mdl},{zone},{OutputResults[model][zone]['MAE']}\n")

In [7]:
REPEATS = 2
OutputResults.clear()

for i in range(0, REPEATS):
  MAE_Scores = [0,0,0]
  RMSE_Scores = [0,0,0]
  for model in ModelsToTest:
    print(f"-----{str(model).split('(')[0]}-----")
    regr = makeModel(model)
    runModel()
    printScores()
  writeMe()
  print()

-----AutoSklearnRegressor-----


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)



-----AutoSklearnRegressor-----


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)



