In [1]:
from tpot import TPOTRegressor
from sklearn.metrics import r2_score
from Utils.validate_model import ModelValidation
from datetime import datetime
import pandas as pd
import pickle

In [2]:
#Loaded model
target_zone='zone_18'
runtime = 600
model = pickle.load(open(f'Frameworks/TPOT/{target_zone}/{target_zone}_{runtime}', 'rb'))

In [3]:
print(model)

Pipeline(steps=[('xgbregressor',
                 XGBRegressor(base_score=0.5, booster='gbtree',
                              colsample_bylevel=1, colsample_bynode=1,
                              colsample_bytree=1, gamma=0, gpu_id=-1,
                              importance_type='gain',
                              interaction_constraints='', learning_rate=0.1,
                              max_delta_step=0, max_depth=8,
                              min_child_weight=17, missing=nan,
                              monotone_constraints='()', n_estimators=100,
                              n_jobs=1, num_parallel_tree=1, random_state=42,
                              reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
                              subsample=0.2, tree_method='exact',
                              validate_parameters=1, verbosity=0))])


In [4]:
data_comlete = pd.read_csv(f'Dataset/{target_zone}.csv')
#gap is the missing values
gap = data_comlete.set_index('datetime')['2008-06-30 05:00:00': '2008-06-30 23:00:00']
gap.reset_index(drop=True, inplace=True)
gap_features = gap.drop('target', axis=1)
gap_target = gap['target']
pred = gap[target_zone]

#run loop to calculate the prediction weeks
for x in range(0, len(gap_target)):
    #row is the current hour in the loop starting with 2008-07-01 00:00:00
    row = gap_features[x:x+1]
    #predict the value of hour X
    pred[x]=model.predict(row)
    #replace the value of target_zone at hour X+1 in the feature dataset
    gap_features[target_zone][x+1]=pred[x]

last_value=pred[18]
print(last_value)

174217.515625


In [5]:
#Load prediction weeks for zone 4
test = pd.read_csv(f'Dataset/Zones/zone_18test.csv')
test = test.drop(['datetime'], axis=1)
#remove target from feature list
features = test.drop('target', axis=1)
#add the last prediction from the missing 19 value as the first target_column value
features[target_zone][0]=last_value
target = test['target']
pred = test[target_zone]

In [6]:
#run loop to calculate the prediction weeks
for x in range(0, len(target)):
    #row is the current hour in the loop starting with 2008-07-01 00:00:00
    row = features[x:x+1]
    #predict the value of hour X
    pred[x]=model.predict(row)
    #replace the value of 'zone_4' at hour X+1 in the feature dataset
    features[target_zone][x+1]=pred[x]




In [7]:
print(features[target_zone])

0      174217.515625
1      157594.718750
2      143885.828125
3      139729.031250
4      142530.703125
           ...      
162    322675.125000
163    314445.843750
164    301240.875000
165    283287.062500
166    243338.671875
Name: zone_18, Length: 167, dtype: float64


In [8]:
print(pred)

0      157594.718750
1      143885.828125
2      139729.031250
3      142530.703125
4      156870.281250
           ...      
162    314445.843750
163    301240.875000
164    283287.062500
165    243338.671875
166    208693.187500
Name: zone_18, Length: 167, dtype: float64


In [9]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import max_error

print(f'Metrics for TPOT {target_zone}_{runtime}')
print('Min Load:', target.min())
print('Max Load:', target.max())
print('Average Load:', target.mean())
print('R2:', r2_score(target, pred))
print('MAE:', mean_absolute_error(target, pred))
print('MAPE:', mean_absolute_percentage_error(target, pred))
print('Max Error:', max_error(target, pred))

Metrics for TPOT zone_18_600
Min Load: 129107.0
Max Load: 352808.0
Average Load: 225798.89221556886
R2: 0.9477481554051927
MAE: 11855.25276010479
MAPE: 0.05430812238042374
Max Error: 39779.21875
