# Persistance Benchmark Model
This benchmark model uses the load from 24 hours prior to predict the load at any given day/time.

Author: Riley Denn

In [1]:
import pandas as pd
import numpy as np
import json
import os
import random
from sklearn.model_selection import train_test_split
import typing as t

In [2]:
with open('../../config.json', 'r') as config_file:
    config = json.load(config_file)

DRIVE_PATH = config['drive_path']
EXTERNAL_DATA_PATH = DRIVE_PATH + "/[EXTERNAL] breakthrough_tech_ai_f24/data"
PROCESSED_DATA_PATH = DRIVE_PATH + "/processed_data"
PROCESSED_WEATHER_LOAD = PROCESSED_DATA_PATH + "/processed_weather_load_w_timestamp"

In [4]:
class PersistenceModel:

    # Ignore, work in progress
    # def get_past_account_for_workday(df, shift_by, current_timestamp, future_is_workday):
        
    #     shift_timestamp = current_timestamp - shift_by
        
    #     if shift_timestamp.isin(df.index):
    #         shift_is_workday = df[timestamp]['is_weekday'] && !df[timestamp]['is_holiday']
    #     else return nan
        
    #     while future_is_workday != shift_is_workday && shift_timestamp.isin(df.index):
    #         shift_timestamp -= 96
    #         shift_is_workday = df[timestamp]['is_weekday'] && !df[timestamp]['is_holiday']

    #     if shift_timestamp.isin(df.index):
    #         return shift_timestamp['load']
    #     else return nan
        
    
    def prep_features_and_label(self, 
                                building_id, 
                                parent_folder_path=PROCESSED_WEATHER_LOAD, 
                                account_for_workday=False):
        
        df = pd.read_csv(f"{parent_folder_path}/{building_id}.csv")
        df = (df.rename(columns={'out.electricity.total.energy_consumption': 'load'})
                 .set_index('timestamp')
                )
        features = ['load', 'is_weekday', 'is_holiday', 'bldg_id']
        label = ['load']
        
        X = df[features].copy()
        Y = df[label].copy()

        if account_for_workday = False:
            # Process X / Features
            for i in range (96, 0, -1):
                X[f'past_shift_{i}'] = X['load'].shift(i)
            X = X.drop(columns='load')

        # Ignore, work in progress
        # else: # account_for_workday = True
        #     # Process X / Features
        #     for i in range (96, 0, -1):
        #         future_i = X['timestamp']+(96-i)
        #         #future_is_workday = future_i.weekday < 5 && !future_i.is_in(us_holidays) #fix this psuedocode then use instead of next line
        #         future_is_workday = X[future_i]['is_weekday'] && !X[future_i]['is_holiday']
                
                
        #         X[f'past_shift_{i}'] = get_past_account_for_workday(X, shift_by=i, current_timestamp, future_is_workday)

        
        # Process Y / Label
        for i in range (1, 96):
            Y[f'future_shift_{i}'] = Y['load'].shift(-i)
            

        return X, Y
    
    def fit(self, X: pd.DataFrame, y: pd.DataFrame) -> t.Self:
        return self

    def predict(self, X: pd.DataFrame) -> pd.DataFrame:
        rename_dict = {}
        rename_dict.update({'past_shift_96': 'load'})
        for i in range(1, 96):
            rename_dict.update({f'past_shift_{96-i}' : f'future_shift_{i}'})
        X = X.rename(columns=rename_dict)
        cols = ['load'] + [f"future_shift_{i}" for i in range(1, 96)]
        return X[cols]

In [5]:
def calculate_smape(actual, predicted):
    numerator = np.abs(predicted - actual)
    denominator = (np.abs(actual) + np.abs(predicted))/2
    diff = numerator / denominator
    smape = np.nanmean(diff) * 100  # Multiply by 100 to get percentage
    return smape

In [3]:
df_metadata = pd.read_csv(PROCESSED_DATA_PATH + "/subset20.csv")
df_metadata.head()

Unnamed: 0,bldg_id,in.state,in.cluster_id,in.vintage,in.sqft,in.building_america_climate_zone_Cold,in.building_america_climate_zone_Hot-Dry,in.building_america_climate_zone_Hot-Humid,in.building_america_climate_zone_Marine,in.building_america_climate_zone_Mixed-Dry,...,in.comstock_building_type_SecondarySchool,in.comstock_building_type_SmallHotel,in.comstock_building_type_SmallOffice,in.comstock_building_type_Warehouse,in.comstock_building_type_group_Education,in.comstock_building_type_group_Food Service,in.comstock_building_type_group_Lodging,in.comstock_building_type_group_Mercantile,in.comstock_building_type_group_Office,in.comstock_building_type_group_Warehouse and Storage
0,105885,10,42.0,3,750000.0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
1,305819,40,74.0,2,150000.0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
2,305934,40,75.0,4,350000.0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
3,317044,40,75.0,3,350000.0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
4,32,1,53.0,6,37500.0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1


In [6]:
bldg_ids = [bldg for bldg in df_metadata['bldg_id']]
train_ids, test_ids = train_test_split(bldg_ids, test_size = 0.2)

model = PersistenceModel()

features = ['load', 'is_weekday', 'is_holiday', 'bldg_id']
label = ['load']

train_smapes = []

for bldg in train_ids:
    X_train, Y_train = model.prep_features_and_label(bldg)
    model.fit(X_train, Y_train)
    prediction_train = model.predict(X_train)
    train_smape = calculate_smape(actual=Y_train, predicted=prediction_train)
    train_smapes.append(train_smape)
    
avg_train_smape = np.mean(train_smapes)

test_smapes = []

for bldg in test_ids:
    X_test, Y_test = model.prep_features_and_label(bldg)
    model.fit(X_test, Y_test)
    prediction_test = model.predict(X_test)
    test_smape = calculate_smape(actual=Y_test, predicted=prediction_test)
    test_smapes.append(test_smape)

avg_test_smape = np.mean(test_smapes)

In [7]:
print(f'avg_train_smape: {avg_train_smape}')
print(f'avg_test_smape: {avg_test_smape}')

avg_train_smape: 19.05406528839612
avg_test_smape: 18.952857886103015


#### Sample Building

In [31]:
sample_model = PersistenceModel()
X_sample, Y_sample = model.prep_features_and_label(df_metadata['bldg_id'][1])
sample_model.fit(X_sample, Y_sample)
sample_prediction = sample_model.predict(X_sample)
sample_smape = calculate_smape(actual=Y_sample, predicted=sample_prediction)
print(f'sample_smape: {sample_smape}')

sample_smape: 22.370761505708145


In [38]:
sample_df = pd.read_csv(f"{PROCESSED_WEATHER_LOAD}/{df_metadata['bldg_id'][1]}.csv")
sample_df

Unnamed: 0,timestamp,out.electricity.total.energy_consumption,Dry Bulb Temperature [°C],Relative Humidity [%],heat_index,minute,hour,day,month,is_weekday,is_holiday,max_load_hourly,min_load_hourly,max_temp_hourly,min_temp_hourly,bldg_id
0,2018-01-01 01:00:00,112.398425,2.984615,75.216154,37.372308,0,1,1,1,1,1,195.978207,112.398425,2.984615,2.719231,305819
1,2018-01-01 01:15:00,195.978207,2.896154,75.650769,37.213077,15,1,1,1,1,1,195.978207,112.398425,2.984615,2.719231,305819
2,2018-01-01 01:30:00,193.125037,2.807692,76.085385,37.053846,30,1,1,1,1,1,195.978207,112.398425,2.984615,2.719231,305819
3,2018-01-01 01:45:00,190.015538,2.719231,76.520000,36.894615,45,1,1,1,1,1,195.978207,112.398425,2.984615,2.719231,305819
4,2018-01-01 02:00:00,186.667871,2.630769,76.954615,36.735385,0,2,1,1,1,1,194.222126,186.667871,2.630769,2.140385,305819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35032,2018-12-31 23:00:00,104.538692,7.652941,97.786471,44.648788,0,23,31,12,1,0,104.538692,102.903932,7.652941,7.330882,305819
35033,2018-12-31 23:15:00,103.258920,7.545588,98.144118,44.453038,15,23,31,12,1,0,104.538692,102.903932,7.652941,7.330882,305819
35034,2018-12-31 23:30:00,102.903932,7.438235,98.501765,44.257289,30,23,31,12,1,0,104.538692,102.903932,7.652941,7.330882,305819
35035,2018-12-31 23:45:00,104.403005,7.330882,98.859412,44.061539,45,23,31,12,1,0,104.538692,102.903932,7.652941,7.330882,305819


In [35]:
X_sample.dropna()

Unnamed: 0_level_0,is_weekday,is_holiday,bldg_id,past_shift_96,past_shift_95,past_shift_94,past_shift_93,past_shift_92,past_shift_91,past_shift_90,...,past_shift_10,past_shift_9,past_shift_8,past_shift_7,past_shift_6,past_shift_5,past_shift_4,past_shift_3,past_shift_2,past_shift_1
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02 01:00:00,1,0,305819,112.398425,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,...,147.881608,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329
2018-01-02 01:15:00,1,0,305819,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,...,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610
2018-01-02 01:30:00,1,0,305819,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,...,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063
2018-01-02 01:45:00,1,0,305819,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,...,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619
2018-01-02 02:00:00,1,0,305819,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,...,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619,204.035637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 23:00:00,1,0,305819,153.951208,152.513778,152.093349,151.267350,150.682006,151.799125,152.127912,...,319.584109,321.602794,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297
2018-12-31 23:15:00,1,0,305819,152.513778,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,...,321.602794,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692
2018-12-31 23:30:00,1,0,305819,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,...,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920
2018-12-31 23:45:00,1,0,305819,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,...,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920,102.903932


In [36]:
Y_sample.dropna()

Unnamed: 0_level_0,load,future_shift_1,future_shift_2,future_shift_3,future_shift_4,future_shift_5,future_shift_6,future_shift_7,future_shift_8,future_shift_9,...,future_shift_86,future_shift_87,future_shift_88,future_shift_89,future_shift_90,future_shift_91,future_shift_92,future_shift_93,future_shift_94,future_shift_95
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 01:00:00,112.398425,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,...,147.881608,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329
2018-01-01 01:15:00,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,...,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610
2018-01-01 01:30:00,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,...,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063
2018-01-01 01:45:00,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,220.078520,...,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619
2018-01-01 02:00:00,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,220.078520,220.199398,...,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619,204.035637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-30 23:15:00,152.513778,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,...,321.602794,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692
2018-12-30 23:30:00,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,168.417792,...,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920
2018-12-30 23:45:00,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,168.417792,168.390861,...,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920,102.903932
2018-12-31 00:00:00,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,168.417792,168.390861,168.012008,...,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920,102.903932,104.403005


In [37]:
sample_prediction.dropna()

Unnamed: 0_level_0,load,future_shift_1,future_shift_2,future_shift_3,future_shift_4,future_shift_5,future_shift_6,future_shift_7,future_shift_8,future_shift_9,...,future_shift_86,future_shift_87,future_shift_88,future_shift_89,future_shift_90,future_shift_91,future_shift_92,future_shift_93,future_shift_94,future_shift_95
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02 01:00:00,112.398425,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,...,147.881608,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329
2018-01-02 01:15:00,195.978207,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,...,150.770919,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610
2018-01-02 01:30:00,193.125037,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,...,154.243501,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063
2018-01-02 01:45:00,190.015538,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,220.078520,...,159.943152,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619
2018-01-02 02:00:00,186.667871,189.059093,191.640625,194.222126,196.788411,202.493276,208.351840,214.132881,220.078520,220.199398,...,167.211828,175.849414,184.638626,188.527070,192.378445,195.459329,198.388610,200.426063,202.288619,204.035637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 23:00:00,153.951208,152.513778,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,...,319.584109,321.602794,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297
2018-12-31 23:15:00,152.513778,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,...,321.602794,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692
2018-12-31 23:30:00,152.093349,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,168.417792,...,324.431185,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920
2018-12-31 23:45:00,151.267350,150.682006,151.799125,152.127912,153.789723,154.953531,168.792862,168.439978,168.417792,168.390861,...,113.727325,111.387913,109.487799,107.360322,106.615102,105.826725,104.707297,104.538692,103.258920,102.903932
