In [51]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
import utils
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
#Load combined temperature and demand dataset for all states
combined = pd.read_csv("../data/combined.csv")

In [4]:
#Read top 5
combined.head()

Unnamed: 0.1,Unnamed: 0,LOCATION,DATETIME,TEMPERATURE,TOTALDEMAND,REGIONID
0,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1
1,1,Bankstown,2010-01-01 00:30:00,22.9,7809.31,NSW1
2,2,Bankstown,2010-01-01 01:00:00,22.6,7483.69,NSW1
3,3,Bankstown,2010-01-01 01:30:00,22.5,7117.23,NSW1
4,4,Bankstown,2010-01-01 02:00:00,22.5,6812.03,NSW1


In [6]:
# Load forecast data
forecast_nsw = pd.read_csv("../data/H03-2021/forecastdemand_nsw.csv")
forecast_vic = pd.read_csv("../data/H06-2021/a/forecastdemand_vic.csv")
forecast_sa = pd.read_csv("../data/H06-2021/b/forecastdemand_sa.csv")
forecast_qld = pd.read_csv("../data/H06-2021/c/forecastdemand_qld.csv")

In [11]:
# Combine forecase data for all states
forecast = pd.concat([forecast_nsw,forecast_vic,forecast_sa,forecast_qld])

In [14]:
# Save forecast data as a checkpoint
forecast.to_csv('../../Datasets/combined_forecast.csv',header=combined.columns, index=False)

In [15]:
# Combine demand and forecast data
df = pd.merge(combined, forecast, on="DATETIME")

In [20]:
df.head()

Unnamed: 0.1,Unnamed: 0,LOCATION,DATETIME,TEMPERATURE,TOTALDEMAND,REGIONID_x,PREDISPATCHSEQNO,REGIONID_y,PERIODID,FORECASTDEMAND,LASTCHANGED
0,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1,2009123018,NSW1,71,7832.04,2009-12-30 12:31:49
1,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1,2009123019,NSW1,70,7832.04,2009-12-30 13:01:43
2,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1,2009123020,NSW1,69,7832.03,2009-12-30 13:31:36
3,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1,2009123021,NSW1,68,7832.03,2009-12-30 14:01:44
4,0,Bankstown,2010-01-01 00:00:00,23.1,8038.0,NSW1,2009123022,NSW1,67,7830.96,2009-12-30 14:31:35


In [32]:
# Drop first column
#df.drop(['Unnamed: 0'], axis=1, inplace=True)
df['DATETIME'] = df['DATETIME'].apply(lambda x: utils.calculate_month(x))

In [33]:
# Save combined demand, forecast data as a checkpoint
#df.to_csv('../../Datasets/combined_demand_forecast.csv',header=combined.columns, index=False)
df.head()

Unnamed: 0,LOCATION,DATETIME,TEMPERATURE,TOTALDEMAND,REGIONID_x,PREDISPATCHSEQNO,REGIONID_y,PERIODID,FORECASTDEMAND,LASTCHANGED
0,Bankstown,2010-01-01,23.1,8038.0,NSW1,2009123018,NSW1,71,7832.04,2009-12-30 12:31:49
1,Bankstown,2010-01-01,23.1,8038.0,NSW1,2009123019,NSW1,70,7832.04,2009-12-30 13:01:43
2,Bankstown,2010-01-01,23.1,8038.0,NSW1,2009123020,NSW1,69,7832.03,2009-12-30 13:31:36
3,Bankstown,2010-01-01,23.1,8038.0,NSW1,2009123021,NSW1,68,7832.03,2009-12-30 14:01:44
4,Bankstown,2010-01-01,23.1,8038.0,NSW1,2009123022,NSW1,67,7830.96,2009-12-30 14:31:35


In [38]:
#Baseline metrics all states, all years
y_pred_all = df['FORECASTDEMAND'].to_numpy()
y_true_all = df['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, All')

12873370.785899756  Mean Squared Error, All


In [39]:
#Baseline Metrics NSW, all years
filter_state = 'NSW1'
rows = df[df.REGIONID_x == filter_state].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, NSW')

10247627.651199264  Mean Squared Error, NSW


In [40]:
#Baseline Metrics VIC, all years
filter_state = 'VIC1'
rows = df[df.REGIONID_x == filter_state].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, VIC')

6864981.864405128  Mean Squared Error, VIC


In [41]:
#Baseline Metrics QLD, all years
filter_state = 'QLD1'
rows = df[df.REGIONID_x == filter_state].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, QLD')

7329046.582546289  Mean Squared Error, QLD


In [42]:
#Baseline Metrics VIC, all years
filter_state = 'SA1'
rows = df[df.REGIONID_x == filter_state].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, SA')

28215613.172914725  Mean Squared Error, SA


In [43]:
#Baseline Metrics 2010
filter_date = '2010'
rows = df[df.DATETIME == filter_date].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, 2010')

3640406.803924413  Mean Squared Error, 2010


In [44]:
#Baseline Metrics 2019
filter_date = '2019'
rows = df[df.DATETIME == filter_date].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, 2019')

11115439.774866126  Mean Squared Error, 2019


In [46]:
#Baseline Metrics 2010
filter_date = '2010'
filter_state = 'NSW1'
rows = df[(df.DATETIME == filter_date) & (df.REGIONID_x == filter_state)].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, NSW 2010')

46191.3297690141  Mean Squared Error, NSW 2010


In [48]:
#Baseline Metrics 2019
filter_date = '2019'
filter_state = 'NSW1'
rows = df[(df.DATETIME == filter_date) & (df.REGIONID_x == filter_state)].index
cols = ['TOTALDEMAND','FORECASTDEMAND']

nsw = df.loc[rows,cols]

y_pred_all = nsw['FORECASTDEMAND'].to_numpy()
y_true_all = nsw['TOTALDEMAND'].to_numpy()

mae_all = mean_squared_error(y_true_all, y_pred_all)

print(mae_all, ' Mean Squared Error, NSW 2019')

12694611.693793308  Mean Squared Error, NSW 2019


In [56]:
#start_date = '2010'
#end_date = '2019'
#rows = df[(df.DATETIME > start_date) & (df.DATETIME < end_date)].index
#cols = ['DATETIME','TOTALDEMAND','FORECASTDEMAND','REGIONID_x']
#fig = plt.figure(figsize = (15,8))
#sns.lineplot(x='DATETIME',y='FORECASTDEMAND', data=df)
#sns.lineplot(x='DATETIME',y='TOTALDEMAND', data=df.loc[rows,cols], hue='REGIONID_x')
#plt.xlabel('Reporting Period')
#plt.ylabel('Demand Forecast')
#plt.title('Demand Forecast ALL ')