# Short Term Load Forecasting using XGBoost

This notebook demonstrates the use of XGBoost for short term load forecasting. It follows the steps in `N. Javaid, M. Nauman Javid Ghuman, Z. Ali Khan, R. Abid Abbasi, and S. Ur Rehman, “Short Term Load Forecasting Using XGBoost,” doi: 10.1007/978-3-030-15035-8_108`.

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = (12.8, 7.2)


In [4]:
df = pd.read_csv('../data/BANES_cleaned_final.csv', parse_dates=['time'])
# df = df.groupby(['time', 'location']).mean().reset_index()


In [5]:
df = df[(df['time'] >= '2014-01-01') & (df['time'] <= '2015-01-01')]

df.head()

Unnamed: 0,time,location,postcode,energy
2936447,2014-01-01 00:00:00+00:00,Junior School Electricity - Kitchen,BA2 1LG,0.244
2936495,2014-01-01 00:00:00+00:00,Paulton Junior School Electricity Supply,BS39 7QY,2.25
2936543,2014-01-01 00:00:00+00:00,Infants School - Kitchen,BA2 1LG,0.119
2936591,2014-01-01 00:00:00+00:00,Westfield Childrens Centre (Now Schools Respon...,BA3 3XX,0.138
2936639,2014-01-01 00:00:00+00:00,Keynsham Childrens Centre - Castle School Annexe,BS31 2TS,0.547


In [6]:
df = df.groupby(['time', 'location']).mean().reset_index()
df['location'].nunique()

47

In [13]:
days_range = pd.date_range(df['time'].min(), df['time'].max(), freq='30min')
for location in df.location.unique():
    if df[df['location'] == location]['time'].nunique() == len(days_range):
        print(location)
    # print(location, df[df['location'] == location]['time'].nunique() == days_range.shape[0])
# days_range.shape, df['time'].nunique()

In [8]:
df.tail()

Unnamed: 0,time,location,energy
511086,2015-01-01 00:00:00+00:00,Stanton Drew Primary School (P272 HH),1.176
511087,2015-01-01 00:00:00+00:00,Swallow Street Stores & Workshop Electri,0.163
511088,2015-01-01 00:00:00+00:00,The Hollies Offices MSN (P272 HH),5.33
511089,2015-01-01 00:00:00+00:00,The Hollies Offices MSN Car Park Barrier,0.365
511090,2015-01-01 00:00:00+00:00,Twerton Infant School Electricity Supply,0.0


In [9]:
df = df.sort_values(by=["location", "time"])
for i in range(1, 48 * 7 + 1):
    df[str(i)] = df['energy'].shift(i)


In [10]:
# for each location remove the first 48 * 7 hours
locations = df['location'].unique()

# drop rows, rows_to_delete contains their index
df.drop([i for j in tqdm(range(len(locations))) for i in df[df['location'] == locations[j]].index[:48 * 7 + 1]], inplace=True)
df.head()


100%|██████████| 47/47 [00:10<00:00,  4.47it/s]


Unnamed: 0,time,location,energy,1,2,3,4,5,6,7,...,327,328,329,330,331,332,333,334,335,336
15601,2014-01-12 08:30:00+00:00,## OLD Paulton Library Electricity Supply 1,0.019,0.019,0.019,0.019,0.019,0.018,0.019,0.019,...,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.018,0.018,0.018
15630,2014-01-12 09:00:00+00:00,## OLD Paulton Library Electricity Supply 1,0.019,0.019,0.019,0.019,0.019,0.019,0.018,0.019,...,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.018,0.018
15658,2014-01-12 09:30:00+00:00,## OLD Paulton Library Electricity Supply 1,0.018,0.019,0.019,0.019,0.019,0.019,0.019,0.018,...,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.018
15686,2014-01-12 10:00:00+00:00,## OLD Paulton Library Electricity Supply 1,0.018,0.018,0.019,0.019,0.019,0.019,0.019,0.019,...,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019
15738,2014-01-12 11:00:00+00:00,## OLD Paulton Library Electricity Supply 1,0.018,0.018,0.018,0.019,0.019,0.019,0.019,0.019,...,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019,0.019


In [11]:
df['time'].min(), df['time'].max()

(Timestamp('2014-01-09 02:00:00+0000', tz='UTC'),
 Timestamp('2015-01-01 00:00:00+0000', tz='UTC'))

## Feature Selection

In [None]:
model = xgb.XGBRegressor(n_estimators=1000, learning_rate=0.05, max_depth=5)
model.fit(df.drop(['energy', 'time', 'location'], axis=1), df['energy'], verbose=True)

XGBoostError: bad allocation