In [1]:
from sklearn.metrics import r2_score
from sklearn.metrics import median_absolute_error
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
import autosklearn.regression
import pickle

In [2]:
#Loaded model
target_zone='zone_18'
runtime = 14400
model = pickle.load(open(f'Frameworks/Auto-sklearn/{target_zone}/{target_zone}_{runtime}', 'rb'))

In [3]:
print(model)

AutoSklearnRegressor(memory_limit=2000000, metric=mean_absolute_error, n_jobs=1,
                     per_run_time_limit=1440,
                     resampling_strategy_arguments={'train_size': 0.8},
                     time_left_for_this_task=14400)


In [4]:
#Predict the missing Data between training and testing dataset
data_comlete = pd.read_csv(f'Dataset/{target_zone}.csv')
#gap is the missing values
gap = data_comlete.set_index('datetime')['2008-06-30 05:00:00': '2008-06-30 23:00:00']
gap.reset_index(drop=True, inplace=True)
gap_features = gap.drop('target', axis=1)
gap_target = gap['target']
pred = gap[target_zone]

#run loop to calculate the prediction weeks
for x in range(0, len(gap_target)):
    #row is the current hour in the loop starting with 2008-07-01 00:00:00
    row = gap_features[x:x+1]
    #predict the value of hour X
    pred[x]=model.predict(row)
    #replace the value of target_zone at hour X+1 in the feature dataset
    gap_features[target_zone][x+1]=pred[x]

last_value=pred[18]
print(last_value)

170220.98608398438


In [5]:
#Load prediction weeks for zone 4
test = pd.read_csv(f'Dataset/Zones/{target_zone}test.csv')
test = test.drop(['datetime'], axis=1)
#remove target from feature list
features = test.drop('target', axis=1)
#add the last prediction from the missing 19 value as the first target_column value
features[target_zone][0]=last_value
target = test['target']
pred = test[target_zone]

In [6]:
#run loop to calculate the prediction weeks
for x in range(0, len(target)):
    #row is the current hour in the loop starting with 2008-07-01 00:00:00
    row = features[x:x+1]
    #predict the value of hour X
    pred[x]=model.predict(row)
    #replace the value of 'zone_4' at hour X+1 in the feature dataset
    features[target_zone][x+1]=pred[x]

In [7]:
print(features[target_zone])

0      170220.986084
1      153158.818115
2      141399.466309
3      136657.480225
4      137792.804443
           ...      
162    312621.246582
163    304671.219238
164    289303.393066
165    274403.770996
166    236626.010254
Name: zone_18, Length: 167, dtype: float64


In [8]:
print(pred)

0      153158.818115
1      141399.466309
2      136657.480225
3      137792.804443
4      150556.580566
           ...      
162    304671.219238
163    289303.393066
164    274403.770996
165    236626.010254
166    203989.298584
Name: zone_18, Length: 167, dtype: float64


In [9]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import max_error

print(f'Metrics for Auto-Sklearn {target_zone}_{runtime}')
print('Min Load:', target.min())
print('Max Load:', target.max())
print('Average Load:', target.mean())
print('R2:', r2_score(target, pred))
print('MAE:', mean_absolute_error(target, pred))
print('MAPE:', mean_absolute_percentage_error(target, pred))
print('Max Error:', max_error(target, pred))

Metrics for Auto-Sklearn zone_18_14400
Min Load: 129107.0
Max Load: 352808.0
Average Load: 225798.89221556886
R2: 0.9582809919838811
MAE: 10408.484576744948
MAPE: 0.0471633122265
Max Error: 36490.72607421875
