In [None]:
# Uncomment the below command if you don't have openpyxl
#!pip install openpyxl 

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
market_data = pd.read_excel("../../raw_data/market_data.xlsx")

### Missing Values

In [4]:
market_data.isnull().sum()

Time (UTC+10)                                             0
Regions NSW Trading Price ($/MWh)                         0
Regions SA Trading Price ($/MWh)                          0
Regions TAS Trading Price ($/MWh)                         0
Regions VIC Trading Price ($/MWh)                         0
Regions NSW Trading Total Intermittent Generation (MW)    0
Regions SA Trading Total Intermittent Generation (MW)     0
Regions TAS Trading Total Intermittent Generation (MW)    0
Regions VIC Trading Total Intermittent Generation (MW)    0
Regions NSW Operational Demand (MW)                       0
Regions SA Operational Demand (MW)                        0
Regions TAS Operational Demand (MW)                       0
Regions VIC Operational Demand (MW)                       0
dtype: int64

> Apparently, no missing values are found!

### Mandatory Task Training and Test set

In [5]:
# NOTE DATETIME IS IN 'YYYY-MM-DD HH:mm:ss' FORMAT
market_data['Time (UTC+10)'] = pd.to_datetime(market_data['Time (UTC+10)'])

In [6]:
market_data.loc[market_data['Time (UTC+10)'] == "2020-07-01 15:00:00"]

Unnamed: 0,Time (UTC+10),Regions NSW Trading Price ($/MWh),Regions SA Trading Price ($/MWh),Regions TAS Trading Price ($/MWh),Regions VIC Trading Price ($/MWh),Regions NSW Trading Total Intermittent Generation (MW),Regions SA Trading Total Intermittent Generation (MW),Regions TAS Trading Total Intermittent Generation (MW),Regions VIC Trading Total Intermittent Generation (MW),Regions NSW Operational Demand (MW),Regions SA Operational Demand (MW),Regions TAS Operational Demand (MW),Regions VIC Operational Demand (MW)
43806,2020-07-01 15:00:00,33.71,19.69,25.11,20.7,79.79,268.63,102.42,335.54,7779,1145,1189,5101


> Checkpoint 2 Spot prices at 01/07/2020 15:00 is 55.93 ($/MWh)

In [5]:
# TRAINING SET
# Training period is from 01/01/2018 to 30/06/2021 provided in the spec
train_start_period = '2018-07-17 00:00:00'
train_end_period   = '2021-07-18 23:30:00'

train_period = (market_data['Time (UTC+10)'] >= train_start_period) & \
               (market_data['Time (UTC+10)'] <= train_end_period)

In [6]:
train_data = market_data.loc[train_period]

In [7]:
train_data.head(1)

Unnamed: 0,Time (UTC+10),Regions NSW Trading Price ($/MWh),Regions SA Trading Price ($/MWh),Regions TAS Trading Price ($/MWh),Regions VIC Trading Price ($/MWh),Regions NSW Trading Total Intermittent Generation (MW),Regions SA Trading Total Intermittent Generation (MW),Regions TAS Trading Total Intermittent Generation (MW),Regions VIC Trading Total Intermittent Generation (MW),Regions NSW Operational Demand (MW),Regions SA Operational Demand (MW),Regions TAS Operational Demand (MW),Regions VIC Operational Demand (MW)
0,2018-01-01,88.0,103.11,90.55,90.43,0.18,60.53,121.68,146.87,7100,1398,1091,4599


In [8]:
train_data.tail(1)

Unnamed: 0,Time (UTC+10),Regions NSW Trading Price ($/MWh),Regions SA Trading Price ($/MWh),Regions TAS Trading Price ($/MWh),Regions VIC Trading Price ($/MWh),Regions NSW Trading Total Intermittent Generation (MW),Regions SA Trading Total Intermittent Generation (MW),Regions TAS Trading Total Intermittent Generation (MW),Regions VIC Trading Total Intermittent Generation (MW),Regions NSW Operational Demand (MW),Regions SA Operational Demand (MW),Regions TAS Operational Demand (MW),Regions VIC Operational Demand (MW)
61295,2021-06-30 23:30:00,80.41,88.37,71.57,73.85,3.41,6.33,81.11,190.95,8654,1587,1231,5487


In [9]:
# TESTING SET
# Testing period is from 01/07/2021 to 11/08/2021 provided in the spec
test_start_period = '2021-07-01 00:30:00'
test_end_period   = '2021-08-11 23:30:00'

test_period = (market_data['Time (UTC+10)'] >= test_start_period) & \
               (market_data['Time (UTC+10)'] <= test_end_period)

In [10]:
test_data = market_data.loc[test_period]

In [11]:
test_data.head(1)

Unnamed: 0,Time (UTC+10),Regions NSW Trading Price ($/MWh),Regions SA Trading Price ($/MWh),Regions TAS Trading Price ($/MWh),Regions VIC Trading Price ($/MWh),Regions NSW Trading Total Intermittent Generation (MW),Regions SA Trading Total Intermittent Generation (MW),Regions TAS Trading Total Intermittent Generation (MW),Regions VIC Trading Total Intermittent Generation (MW),Regions NSW Operational Demand (MW),Regions SA Operational Demand (MW),Regions TAS Operational Demand (MW),Regions VIC Operational Demand (MW)
61297,2021-07-01 00:30:00,190.22,96.47,99.43,90.51,5.26,4.92,77.71,175.53,8402,1679,1154,5192


In [12]:
test_data.tail(1)

Unnamed: 0,Time (UTC+10),Regions NSW Trading Price ($/MWh),Regions SA Trading Price ($/MWh),Regions TAS Trading Price ($/MWh),Regions VIC Trading Price ($/MWh),Regions NSW Trading Total Intermittent Generation (MW),Regions SA Trading Total Intermittent Generation (MW),Regions TAS Trading Total Intermittent Generation (MW),Regions VIC Trading Total Intermittent Generation (MW),Regions NSW Operational Demand (MW),Regions SA Operational Demand (MW),Regions TAS Operational Demand (MW),Regions VIC Operational Demand (MW)
63311,2021-08-11 23:30:00,54.42,49.82,11.29,51.37,146.67,242.53,167.19,296.36,7795,1571,1259,5332


In [13]:
# SAVE TRAIN AND TEST SET TO CSV
train_data.to_csv('../../preprocessed_data/mandatory_train.csv')
test_data.to_csv('../../preprocessed_data/mandatory_test.csv')