In [1]:
import pandas as pd
import numpy as np

In [2]:
# importing data set from aemo.com.au/energy-systems/electricity/national-electricity-market-nem/data-nem
df = pd.read_csv('../csv/PRICE_AND_DEMAND_202301_NSW1.csv')
df.head()

Unnamed: 0,REGION,SETTLEMENTDATE,TOTALDEMAND,RRP,PERIODTYPE
0,NSW1,2023/01/01 00:05:00,6906.63,140.4,TRADE
1,NSW1,2023/01/01 00:10:00,6854.28,133.96,TRADE
2,NSW1,2023/01/01 00:15:00,6856.32,120.0,TRADE
3,NSW1,2023/01/01 00:20:00,6796.27,118.99,TRADE
4,NSW1,2023/01/01 00:25:00,6805.55,118.99,TRADE


In [4]:
df.dtypes

REGION             object
SETTLEMENTDATE     object
TOTALDEMAND       float64
RRP               float64
PERIODTYPE         object
dtype: object

In [5]:
df['Date_Time'] = pd.to_datetime(df['SETTLEMENTDATE'], dayfirst=True)
df['Day'] = df['Date_Time'].dt.day
df['Month'] = df['Date_Time'].dt.month
df['Year'] = df['Date_Time'].dt.year
df['Hour'] = df['Date_Time'].dt.hour
df['Minute'] = df['Date_Time'].dt.minute
df

Unnamed: 0,REGION,SETTLEMENTDATE,TOTALDEMAND,RRP,PERIODTYPE,Date_Time,Day,Month,Year,Hour,Minute
0,NSW1,2023/01/01 00:05:00,6906.63,140.40,TRADE,2023-01-01 00:05:00,1,1,2023,0,5
1,NSW1,2023/01/01 00:10:00,6854.28,133.96,TRADE,2023-01-01 00:10:00,1,1,2023,0,10
2,NSW1,2023/01/01 00:15:00,6856.32,120.00,TRADE,2023-01-01 00:15:00,1,1,2023,0,15
3,NSW1,2023/01/01 00:20:00,6796.27,118.99,TRADE,2023-01-01 00:20:00,1,1,2023,0,20
4,NSW1,2023/01/01 00:25:00,6805.55,118.99,TRADE,2023-01-01 00:25:00,1,1,2023,0,25
...,...,...,...,...,...,...,...,...,...,...,...
8923,NSW1,2023/01/31 23:40:00,7368.45,125.66,TRADE,2023-01-31 23:40:00,31,1,2023,23,40
8924,NSW1,2023/01/31 23:45:00,7306.86,120.06,TRADE,2023-01-31 23:45:00,31,1,2023,23,45
8925,NSW1,2023/01/31 23:50:00,7281.33,120.06,TRADE,2023-01-31 23:50:00,31,1,2023,23,50
8926,NSW1,2023/01/31 23:55:00,7300.71,120.06,TRADE,2023-01-31 23:55:00,31,1,2023,23,55


In [6]:
df.dtypes

REGION                    object
SETTLEMENTDATE            object
TOTALDEMAND              float64
RRP                      float64
PERIODTYPE                object
Date_Time         datetime64[ns]
Day                        int64
Month                      int64
Year                       int64
Hour                       int64
Minute                     int64
dtype: object

In [7]:
# Converting to numerics Reference: pandas.pydate.org/pandas-docs/stable/reference/api/pandas.to_numeric.html
df['Demand'] = pd.to_numeric(df['TOTALDEMAND'], errors='coerce')
df['Price'] = pd.to_numeric(df['RRP'], errors='coerce')

In [9]:
df.drop(['TOTALDEMAND', 'REGION', 'SETTLEMENTDATE', 'RRP', 'PERIODTYPE', 'Date_Time'], axis=1, inplace=True)
df

Unnamed: 0,Day,Month,Year,Hour,Minute,Demand,Price
0,1,1,2023,0,5,6906.63,140.40
1,1,1,2023,0,10,6854.28,133.96
2,1,1,2023,0,15,6856.32,120.00
3,1,1,2023,0,20,6796.27,118.99
4,1,1,2023,0,25,6805.55,118.99
...,...,...,...,...,...,...,...
8923,31,1,2023,23,40,7368.45,125.66
8924,31,1,2023,23,45,7306.86,120.06
8925,31,1,2023,23,50,7281.33,120.06
8926,31,1,2023,23,55,7300.71,120.06


## Transform the Data

In [10]:
x0 = []
x1 = []
x2 = []
x3 = []
x4 = []
x5 = []
x6 = []
y = []

In [11]:
for i in range (0, df.shape[0]-48):
    x0.append(df.iloc[i:i + 48, 0])
    x1.append(df.iloc[i:i + 48, 1])
    x2.append(df.iloc[i:i + 48, 2])
    x3.append(df.iloc[i:i + 48, 3])
    x4.append(df.iloc[i:i + 48, 4])
    x5.append(df.iloc[i:i + 48, 5])
    x6.append(df.iloc[i:i + 48, 6])
    y.append(df.iloc[i + 48, 6])
    

In [12]:
type(x5)

list

In [13]:
df.head(20)

Unnamed: 0,Day,Month,Year,Hour,Minute,Demand,Price
0,1,1,2023,0,5,6906.63,140.4
1,1,1,2023,0,10,6854.28,133.96
2,1,1,2023,0,15,6856.32,120.0
3,1,1,2023,0,20,6796.27,118.99
4,1,1,2023,0,25,6805.55,118.99
5,1,1,2023,0,30,6739.32,107.55
6,1,1,2023,0,35,6712.64,118.99
7,1,1,2023,0,40,6704.01,117.02
8,1,1,2023,0,45,6682.31,109.5
9,1,1,2023,0,50,6655.04,110.12


In [14]:
x0, x1, x2, x3, x4, x5, x6, y = np.array(x0), np.array(x1), np.array(x2), np.array(x3), np.array(x4), np.array(x5), np.array(x6), np.array(y)

In [15]:
y.shape

(8880,)

In [16]:
y = np.reshape(y, (len(y), 1))
y.shape

(8880, 1)

In [17]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
x0 = scaler.fit_transform(x0)
x1 = scaler.fit_transform(x1)
x2 = scaler.fit_transform(x2)
x3 = scaler.fit_transform(x3)
x4 = scaler.fit_transform(x4)
x5 = scaler.fit_transform(x5)
x6 = scaler.fit_transform(x6)
y = scaler.fit_transform(y)

In [18]:
x5

array([[0.37304588, 0.36509578, 0.36540558, ..., 0.2167546 , 0.21750329,
        0.21908268],
       [0.36509578, 0.36540558, 0.35628612, ..., 0.21750329, 0.21908268,
        0.21220626],
       [0.36540558, 0.35628612, 0.35769543, ..., 0.21908268, 0.21220626,
        0.22493098],
       ...,
       [0.66393463, 0.66697647, 0.65258276, ..., 0.44306602, 0.44317992,
        0.43382659],
       [0.66697647, 0.65258276, 0.64663727, ..., 0.44317992, 0.43382659,
        0.42994949],
       [0.65258276, 0.64663727, 0.64593869, ..., 0.43382659, 0.42994949,
        0.43289262]])

In [19]:
x5.shape

(8880, 48)

In [20]:
X = np.stack([x0, x1, x2, x3, x4, x5, x6], axis=2)

In [21]:
X.shape

(8880, 48, 7)

## Build the LSTM Moded