In [1]:
# Script Name: Combination of Temperature and Energy Demand
# Author: Rahul Kumar
# Date: 4/3/22
# Description: The purpose of the script is to combine the temperature and energy demand data into a unified dataset that 
# can be used to work with

import pandas as pd
import numpy as np
from math import sqrt
import seaborn as sns
import matplotlib.pyplot as plt

# This actually sets the pandas display to show all rows and columns 
# when you are showing a dataframe, without skipping the center
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
filepath = '../../UNSW-PROJECT-DATA/H06-2021/' # If you want to run the code, change the file path to where you store the files. This reference
                                # is to where I store the files outside the github clone
    
outpath = '../../UNSW-PROJECT-DATA/' #I store it on my comp and the copy the file to the server
# filename = 'totaldemand_nsw'
ext = '.csv'

states = ['nsw','qld','sa','vic']

In [3]:
# 'temprature', 'totaldemand', 'forecastdemand'

filename = 'temprature' #<-correct your NSW temperature filename to temprature
temp = pd.DataFrame(columns=['STATE','LOCATION','DATETIME','TEMPERATURE'])

for state in states:
    df = pd.read_csv(filepath+filename+'_'+state+ext)
    df['STATE'] = state.upper()
    df['DATETIME'] = pd.to_datetime(df['DATETIME'])
    temp = pd.concat([temp,df], ignore_index=True)
    
temp = temp[['STATE','LOCATION','DATETIME','TEMPERATURE']]
temp.to_csv(outpath+filename+ext)

In [4]:
filename = 'totaldemand'
demand = pd.DataFrame(columns=['STATE','DATETIME','TOTALDEMAND','REGIONID'])

for state in states:
    df = pd.read_csv(filepath+filename+'_'+state+ext)
    df['STATE'] = state.upper()
    df['DATETIME'] = pd.to_datetime(df['DATETIME'])
    demand = pd.concat([demand,df], ignore_index=True)

demand = demand[['STATE','DATETIME','TOTALDEMAND']]
demand.to_csv(outpath+filename+ext)

In [5]:
# fd_nsw = fd_nsw[['FORECASTDEMAND','DATETIME']]
# fd_nsw = fd_nsw.groupby('DATETIME').mean().reset_index()

filename = 'forecastdemand'
fdemand = pd.DataFrame(columns=['STATE','FORECASTDEMAND','DATETIME'])

for state in states:
    df = pd.read_csv(filepath+filename+'_'+state+ext)
    df = df.groupby('DATETIME').mean().reset_index()
    df['STATE'] = state.upper()
    df['DATETIME'] = pd.to_datetime(df['DATETIME'])
    df = df[['STATE','FORECASTDEMAND','DATETIME']]
    fdemand = pd.concat([fdemand,df], ignore_index=True)
    
fdemand.to_csv(outpath+'mean_'+filename+ext)

In [6]:
# Knowing that Total Demand has the most rows, we will merge on that
# DF STATE Date rows - 418,012
# Temp STATE Date rows - 778,177
# TD STATE Date rows - 786,051

filename = 'combined'
combined = demand.merge(temp, left_on=['STATE','DATETIME'], right_on = ['STATE','DATETIME'],how = 'outer')
combined_fdavg = combined.merge(fdemand, left_on=['STATE','DATETIME'], right_on = ['STATE','DATETIME'],how = 'outer')

# Based on the Climate Glossary - http://www.bom.gov.au/climate/glossary/seasons.shtml the seasons are broken down as
# follows: 9,10,11 are Spring, 12,1,2 are Summer, 3,4,5 are Autumn and 6,7,8 are Winter

def Season(item):
    if item == 12 or item == 1 or item == 2:
        return 'Summer'
    elif item == 3 or item == 4 or item == 5:
        return 'Autumn'
    elif item == 6 or item == 7 or item == 8:
        return 'Winter'
    else:
        return 'Spring'
    

combined['DATETIME'] = pd.to_datetime(combined['DATETIME'])
combined['Weekday'] = combined['DATETIME'].dt.day_name()
combined['Quarter'] = combined['DATETIME'].dt.quarter
combined['Month'] = combined['DATETIME'].dt.month
combined['Season'] = combined['Month'].apply(Season)

combined_fdavg['DATETIME'] = pd.to_datetime(combined_fdavg['DATETIME'])
combined_fdavg['Weekday'] = combined_fdavg['DATETIME'].dt.day_name()
combined_fdavg['Quarter'] = combined_fdavg['DATETIME'].dt.quarter
combined_fdavg['Month'] = combined_fdavg['DATETIME'].dt.month
combined_fdavg['Season'] = combined_fdavg['Month'].apply(Season)





In [7]:
# https://meteostat.net/en/blog/obtain-weather-data-any-location-python

# Can be potentially used to fill any missing temps

In [8]:
# Sunset sunrise

sunrise = pd.DataFrame(columns=['STATE','SUNRISE','SUNSET'])

def hour_split(number):
    number = str(number)
    hour = number[:-2]
    return (hour)

def min_split(number):
    number = str(number)
    minutes = number[-2:]
    return int(minutes)

filepath2 = '../../UNSW-PROJECT-DATA/Sunrise Sunset Data/'
STATE2 = ['NSW', 'QLD', 'SA', 'VIC']
filename = 'times of sunrise and sunset annual results '
years = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']

for state in STATE2:
    for year in years:
        df = pd.read_csv(filepath2+state+'/'+filename+year+ext)
        df['year'] = int(year)
        df['risehr'] = df['rise'].apply(hour_split)
        df['risemn'] = df['rise'].apply(min_split)
        df['sethr'] = df['set'].apply(hour_split)
        df['setmn'] = df['set'].apply(min_split)
        df['SUNRISE']= pd.to_datetime(dict(year=df.year, month=df.month, day = df.day, hour = df.risehr, minute = df.risemn))
        df['SUNSET']= pd.to_datetime(dict(year=df.year, month=df.month, day = df.day, hour = df.sethr, minute = df.setmn))
        df = df[['SUNRISE','SUNSET']]
        df['STATE'] = state
        sunrise = pd.concat([sunrise,df], ignore_index=True)

sunrise.to_csv('../data/'+'sunrise_sunset_combined'+ext)

In [9]:
combined['Day'] = combined['DATETIME'].dt.day
combined['Year'] = combined['DATETIME'].dt.year

combined_fdavg['Day'] = combined_fdavg['DATETIME'].dt.day
combined_fdavg['Year'] = combined_fdavg['DATETIME'].dt.year

sunrise['Day'] = sunrise['SUNRISE'].dt.day
sunrise['Year'] = sunrise['SUNRISE'].dt.year
sunrise['Month'] = sunrise['SUNRISE'].dt.month

In [10]:
combined = combined.merge(sunrise, left_on=['Day','Month','Year','STATE'], right_on = ['Day','Month','Year','STATE'],how = 'inner')
combined_fdavg = combined_fdavg.merge(sunrise, left_on=['Day','Month','Year','STATE'], right_on = ['Day','Month','Year','STATE'],how = 'inner')

In [11]:
conditions = [
    (combined['DATETIME'] >= combined['SUNRISE']) & (combined['DATETIME'] <= combined['SUNSET']),
    (combined['DATETIME'] > combined['SUNSET'])]
choices = ['Day', 'Night']
combined['DAYTYPE'] = np.select(conditions, choices, default='Night')

conditions = [
    (combined_fdavg['DATETIME'] >= combined_fdavg['SUNRISE']) & (combined_fdavg['DATETIME'] <= combined_fdavg['SUNSET']),
    (combined_fdavg['DATETIME'] > combined_fdavg['SUNSET'])]
choices = ['Day', 'Night']
combined_fdavg['DAYTYPE'] = np.select(conditions, choices, default='Night')

In [12]:
filename = 'combined'
combined.to_csv(outpath+filename+ext)
combined_fdavg.to_csv(outpath+filename+'_fd_Avg'+ext)

In [13]:
# Dont remove
combined.isna().sum()

STATE              0
DATETIME           0
TOTALDEMAND    56756
LOCATION       64643
TEMPERATURE    64643
Weekday            0
Quarter            0
Month              0
Season             0
Day                0
Year               0
SUNRISE            0
SUNSET             0
DAYTYPE            0
dtype: int64

In [14]:
combined_fdavg.isna().sum()

STATE                  0
DATETIME               0
TOTALDEMAND        58220
LOCATION           66107
TEMPERATURE        66107
FORECASTDEMAND    426259
Weekday                0
Quarter                0
Month                  0
Season                 0
Day                    0
Year                   0
SUNRISE                0
SUNSET                 0
DAYTYPE                0
dtype: int64