In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
%matplotlib inline
from codecarbon import track_emissions
e_path = "/Users/ScottJeen/OneDrive - University of Cambridge/Admin/phd_emissions"
pd.set_option('display.max_columns', None)

In [2]:
import glob
import re
from datetime import datetime

path_jonluca = "/Users/ScottJeen/OneDrive - University of Cambridge/Research/Modelling/Emerson/jonluca_data/*.csv"
dfs = []

for fname in glob.glob(path_jonluca):
    df = pd.read_csv(fname, header=1)
    df = df.drop('Status',axis=1)
    
    # get feature name from path 
    column_name = re.findall("[A-Z][A-Z].*\.",fname)

    # rename features
    df = df.rename(columns={df.columns[0]: 'Datetime', df.columns[1]: column_name[0]})
    
    # format datatime
    datetime_format = '%b %d, %Y %H:%M:%S %p'
    df['Datetime'] = pd.to_datetime(df['Datetime'], format=datetime_format)
    df = df.set_index('Datetime')
    
    # remove multiple entries at each timestep
    df = df[~df.index.duplicated(keep='first')]

    dfs.append(df)

In [3]:
# join columns on datatime and sort alphabetically
data = dfs[0].join(dfs[1:], how='inner')
data = data.sort_index(axis=1)

# drop faulty freezer temperature sensor feature
data = data.drop(['FREEZER SLAB TEMP DOOR INSIDE.'], axis=1)

# normalize humidity features
hum = data.columns.str.contains('HUMIDITY')
data.loc[:,hum] = data.loc[:,hum] / 100

# get power data from amps (power (kW) = amps * 600V / 1000)
amp = data.columns.str.contains('COMP AMP')
data.loc[:,amp] = data.loc[:,amp] * 600 / 1000

# rename columns
new_cols = pd.Series(data.columns).str.replace('AMP', 'POWER (kW)').str.replace('.','')
new_cols = list(new_cols)
data.columns = new_cols

# add total power feature
power_features = data.columns.str.contains('POWER')
data['TOTAL POWER (kW)'] = data.loc[:,power_features].sum(axis=1)

# add energy feature (assume power is constant for 3 minute period between datapoints)
data['TOTAL ENERGY (kWh)'] = data['TOTAL POWER (kW)'] * (60/3)

In [4]:
# create datetime convertor
def datetime_conv(df, hour_format='%H', date_format='%Y-%m-%d', hour='Hour', date='Date'):
    
    # format hour feature to padded 24h 
    df[hour] = df[hour] - 1
    df[hour] = df[hour].astype(str)
    df[hour] = df[hour].str.pad(width=2, side='left', fillchar='0')

    # convert to datetime
    df[date] = pd.to_datetime(df[date], format=date_format)
    df[hour] = pd.to_datetime(df[hour], format=hour_format)

    x = []

    for index, row in df.iterrows():
        d = row.loc[date].date()
        t = row.loc[hour].time()
        x.append(dt.datetime.combine(d, t))

    df['Datetime'] = pd.Series(x)
    
    # drop old date and time cols
    df = df.drop([hour, date], axis=1)
    
    # set index to datetime
    df = df.set_index('Datetime')
    
    return df

In [5]:
import datetime as dt

# import elec data
path_elec = '/Users/ScottJeen/OneDrive - University of Cambridge/Research/Modelling/Emerson/elec_data/*.csv'

dfs_elec = []
files = [fname for fname in glob.glob(path_elec)]

# read hourly price data
hourly_price = pd.read_csv(files[0], header=3)
hourly_price = hourly_price.drop(hourly_price.columns[6:], axis=1)

# run datetime convertor
hourly_price = datetime_conv(hourly_price)

# rename columns
cols = hourly_price.columns
new_cols = {cols[0]: 'PRICE ($/MWH)',\
            cols[1]: '1 HOUR PRICE PREDICT',\
            cols[2]: '2 HOUR PRICE PREDICT',\
            cols[3]: '3 HOUR PRICE PREDICT'
           }

hourly_price = hourly_price.rename(new_cols, axis=1)

In [6]:
# read elec supply data
hourly_supply = pd.read_csv(files[1])

# run datetime convertor
hourly_supply = datetime_conv(hourly_supply, date_format='%d/%m/%Y')

# rename columns
hourly_supply = hourly_supply.rename({'Total Output': "TOTAL SUPPLY_MW",\
                                     'NUCLEAR': 'NUCLEAR_MW',\
                                      'GAS': 'GAS_MW',\
                                      'HYDRO': 'HYDRO_MW',\
                                      'WIND': 'WIND_MW',\
                                      'SOLAR': 'SOLAR_MW',\
                                      'BIOFUEL': 'BIOFUEL_MW'
                                     },\
                                     axis=1)

In [7]:
# cache timeseries index
index = data.index

# merge jonluca and prices
data = data.merge(hourly_price,\
                  left_on=[data.index],\
                  right_on=[hourly_price.index],\
                  how='left'
                  ).set_index(index) # keep 3 minute datetime index
                
data = data.drop(['key_0'], axis=1)

# merge jonluca/prices and supply
data = data.merge(hourly_supply,\
                  left_on=[data.index],\
                  right_on=[hourly_supply.index],\
                  how='left'
                  ).set_index(index) # keep 3 minute datetime index

data = data.drop(['key_0'], axis=1)

data.interpolate(method='time', axis=0, inplace=True)


In [8]:
# create grid emission features
gas_intensity = 400 # kg/MWh

data['GRID EMISSION INTENSITY_kg/MWh'] = (data['GAS_MW'] / data['TOTAL SUPPLY_MW']) * gas_intensity
data['GRID EMISSIONS_kgs'] = data['GRID EMISSION INTENSITY_kg/MWh'] * (3/60) # 3 minute intervals

In [9]:
data.head()

Unnamed: 0_level_0,COOLER COMP POWER (kW) 1A,COOLER COMP POWER (kW) 1B,COOLER COMP POWER (kW) 2A,COOLER COMP POWER (kW) 2B,COOLER COMP POWER (kW) 3A,COOLER COMP POWER (kW) 3B,COOLER HUMIDITY FRONT,COOLER HUMIDITY LEFT,COOLER HUMIDITY REAR,COOLER HUMIDITY RIGHT,COOLER SLAB TEMP DOOR INSIDE LEFT,COOLER SLAB TEMP DOOR INSIDE RIGHT,COOLER SLAB TEMP DOOR OUTSIDE RIGHT,COOLER SOIL TEMP DOOR INSIDE LEFT,COOLER TEMP LEFT,COOLER TEMP RIGHT,FREEZER COMP POWER (kW) 4A,FREEZER COMP POWER (kW) 4B,FREEZER COMP POWER (kW) 5A,FREEZER COMP POWER (kW) 5B,FREEZER HUMIDITY LEFT,FREEZER HUMIDITY RIGHT,FREEZER SLAB TEMP DOOR OUTSIDE1,FREEZER SLAB TEMP DOOR OUTSIDE2,FREEZER SLAB TEMP FRONT LEFT,FREEZER SLAB TEMP REAR RIGHT,FREEZER SOIL TEMP DOOR INSIDE,FREEZER SOIL TEMP DOOR OUTSIDE1,FREEZER SOIL TEMP DOOR OUTSIDE2,FREEZER SOIL TEMP FRONT LEFT,FREEZER SOIL TEMP REAR RIGHT,FREEZER TEMP FRONT LEFT,FREEZER TEMP REAR RIGHT,GLYCOL PRESSURE RETURN,GLYCOL PRESSURE SUPPLY,GLYCOL TEMP RETURN,GLYCOL TEMP SUPPLY,TOTAL POWER (kW),TOTAL ENERGY (kWh),PRICE ($/MWH),1 HOUR PRICE PREDICT,2 HOUR PRICE PREDICT,3 HOUR PRICE PREDICT,NUCLEAR_MW,GAS_MW,HYDRO_MW,WIND_MW,SOLAR_MW,BIOFUEL_MW,TOTAL SUPPLY_MW,GRID EMISSION INTENSITY_kg/MWh,GRID EMISSIONS_kgs
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
2020-07-15 10:57:00,4.806,3.24,4.65,4.83,0.012,4.674,0.6998,0.6678,0.9724,0.6802,7.91,8.21,16.86,8.74,3.03,2.76,0.006,10.806,11.226,0.006,0.6232,0.6232,11.72,11.72,-18.66,-16.88,15.44,17.69,12.52,9.04,9.55,-19.61,-19.22,1.67,1.84,10.34,10.36,44.256,885.12,,,,,,,,,,,,,
2020-07-15 11:00:00,4.686,3.246,4.596,4.818,0.012,4.65,0.7004,0.6682,0.9726,0.6792,7.89,8.25,16.79,8.7,2.99,2.72,0.012,10.158,11.364,0.012,0.6234,0.6234,11.7,11.7,-18.69,-16.78,15.41,17.69,12.5,9.04,9.51,-19.76,-19.2,1.67,1.84,10.32,10.36,43.554,871.08,16.81,16.75,16.79,16.8,11424.0,2970.0,4902.0,602.0,372.0,27.0,20297.0,58.530817,2.926541
2020-07-15 11:03:00,4.638,3.234,4.608,4.836,0.012,4.674,0.701,0.669,0.9704,0.6856,7.93,8.23,16.84,8.7,3.03,2.66,0.006,11.334,0.012,0.006,0.6222,0.6222,11.7,11.7,-18.69,-16.83,15.41,17.69,12.48,9.02,9.53,-19.76,-19.25,1.67,1.84,10.3,10.34,33.36,667.2,16.8035,16.743,16.785,16.7955,11419.1,2972.5,4896.8,601.0,372.55,27.3,20289.25,58.602462,2.930123
2020-07-15 11:06:00,4.776,3.228,4.65,4.758,0.012,4.686,0.701,0.6652,0.9726,0.693,7.95,8.23,16.86,8.74,2.95,2.66,0.012,10.932,0.012,0.006,0.6194,0.6194,11.7,11.7,-18.69,-16.81,15.44,17.69,12.48,9.02,9.53,-19.68,-19.3,1.67,1.84,10.32,10.34,33.072,661.44,16.797,16.736,16.78,16.791,11414.2,2975.0,4891.6,600.0,373.1,27.6,20281.5,58.674161,2.933708
2020-07-15 11:09:00,4.638,3.228,4.602,4.722,0.012,4.65,0.7008,0.6644,0.9714,0.6858,7.91,8.17,16.86,8.76,2.93,2.64,0.006,10.212,0.012,0.006,0.6172,0.6172,11.67,11.67,-18.66,-16.83,15.39,17.69,12.48,9.02,9.47,-19.58,-19.25,1.67,1.84,10.3,10.32,32.088,641.76,16.7905,16.729,16.775,16.7865,11409.3,2977.5,4886.4,599.0,373.65,27.9,20273.75,58.745915,2.937296


## Weather Data

In [10]:
import os

# get current working directory
owd = os.getcwd()

# change to weather data directory
os.chdir('/Users/ScottJeen/OneDrive - University of Cambridge/Research/Modelling/Emerson/weather_data')

# download weather data from command line
os.system('for year in `seq 2020 2020`;do for month in `seq 7 12`;do wget --content-disposition "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=51459&Year=${year}&Month=${month}&Day=14&timeframe=1&submit= Download+Data" ;done;done')

# change back to current directory
os.chdir(owd)

# import weather data
path_weath = '/Users/ScottJeen/OneDrive - University of Cambridge/Research/Modelling/Emerson/weather_data/*.csv'

files = [fname for fname in glob.glob(path_weath)]

# read monthly weather data
dfs_weath = []
for f in files:
    month = pd.read_csv(f, header=0)
    dfs_weath.append(month)

hourly_weath = pd.concat(dfs_weath)
hourly_weath = hourly_weath.sort_values(by=['Month', 'Day'])
hourly_weath = hourly_weath.rename({'Date/Time (LST)': 'Datetime',
                                    'Temp (°C)': 'OUTSIDE TEMP (oC)',
                                    'Dew Point Temp (°C)': 'OUTSIDE DEW POINT (oC)',
                                    'Rel Hum (%)': 'OUTSIDE HUMIDITY (%)',
                                    'Wind Spd (km/h)': 'WIND (km/h)',
                                    'Wind Dir (10s deg)': 'WIND DIR (DEGREES)',
                                    'Stn Press (kPa)': 'PRESSURE (kPa)'
                                   }, axis=1)

hourly_weath['Datetime'] = pd.to_datetime(hourly_weath['Datetime'])
hourly_weath = hourly_weath.set_index('Datetime')

hourly_weath = hourly_weath.drop([
    'Longitude (x)',
    'Latitude (y)',
    'Station Name',
    'Climate ID',
    'Year',
    'Month',
    'Day',
    'Time (LST)',
    'Temp Flag',
    'Dew Point Temp Flag',
    'Rel Hum Flag',
    'Precip. Amount (mm)',
    'Precip. Amount Flag',
    'Wind Dir Flag',
    'Wind Spd Flag',
    'Visibility (km)',
    'Visibility Flag',
    'Stn Press Flag',
    'Hmdx',
    'Hmdx Flag',
    'Wind Chill',
    'Wind Chill Flag',
    'Weather'
], axis=1)


In [11]:
# merge jonluca/weather
data = data.merge(hourly_weath,\
                  left_on=[data.index],\
                  right_on=[hourly_weath.index],\
                  how='left'
                  ).set_index(index) # keep 3 minute datetime index

data = data.drop(['key_0'], axis=1)
data.interpolate(method='time', axis=0, inplace=True)

In [22]:
data = data.iloc[1:]
data

Unnamed: 0_level_0,COOLER COMP POWER (kW) 1A,COOLER COMP POWER (kW) 1B,COOLER COMP POWER (kW) 2A,COOLER COMP POWER (kW) 2B,COOLER COMP POWER (kW) 3A,COOLER COMP POWER (kW) 3B,COOLER HUMIDITY FRONT,COOLER HUMIDITY LEFT,COOLER HUMIDITY REAR,COOLER HUMIDITY RIGHT,COOLER SLAB TEMP DOOR INSIDE LEFT,COOLER SLAB TEMP DOOR INSIDE RIGHT,COOLER SLAB TEMP DOOR OUTSIDE RIGHT,COOLER SOIL TEMP DOOR INSIDE LEFT,COOLER TEMP LEFT,COOLER TEMP RIGHT,FREEZER COMP POWER (kW) 4A,FREEZER COMP POWER (kW) 4B,FREEZER COMP POWER (kW) 5A,FREEZER COMP POWER (kW) 5B,FREEZER HUMIDITY LEFT,FREEZER HUMIDITY RIGHT,FREEZER SLAB TEMP DOOR OUTSIDE1,FREEZER SLAB TEMP DOOR OUTSIDE2,FREEZER SLAB TEMP FRONT LEFT,FREEZER SLAB TEMP REAR RIGHT,FREEZER SOIL TEMP DOOR INSIDE,FREEZER SOIL TEMP DOOR OUTSIDE1,FREEZER SOIL TEMP DOOR OUTSIDE2,FREEZER SOIL TEMP FRONT LEFT,FREEZER SOIL TEMP REAR RIGHT,FREEZER TEMP FRONT LEFT,FREEZER TEMP REAR RIGHT,GLYCOL PRESSURE RETURN,GLYCOL PRESSURE SUPPLY,GLYCOL TEMP RETURN,GLYCOL TEMP SUPPLY,TOTAL POWER (kW),TOTAL ENERGY (kWh),PRICE ($/MWH),1 HOUR PRICE PREDICT,2 HOUR PRICE PREDICT,3 HOUR PRICE PREDICT,NUCLEAR_MW,GAS_MW,HYDRO_MW,WIND_MW,SOLAR_MW,BIOFUEL_MW,TOTAL SUPPLY_MW,GRID EMISSION INTENSITY_kg/MWh,GRID EMISSIONS_kgs,OUTSIDE TEMP (oC),OUTSIDE DEW POINT (oC),OUTSIDE HUMIDITY (%),WIND DIR (DEGREES),WIND (km/h),PRESSURE (kPa)
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1
2020-07-15 11:00:00,4.686,3.246,4.596,4.818,0.012,4.650,0.7004,0.6682,0.9726,0.6792,7.89,8.25,16.79,8.70,2.99,2.72,0.012,10.158,11.364,0.012,0.6234,0.6234,11.70,11.70,-18.69,-16.78,15.41,17.69,12.50,9.04,9.51,-19.76,-19.20,1.67,1.84,10.32,10.36,43.554,871.08,16.8100,16.75000,16.790000,16.800000,11424.000000,2970.000000,4902.000000,602.000,372.000000,27.000000,20297.000000,58.530817,2.926541,25.900,16.400,55.000000,13.000000,17.000000,100.010000
2020-07-15 11:03:00,4.638,3.234,4.608,4.836,0.012,4.674,0.7010,0.6690,0.9704,0.6856,7.93,8.23,16.84,8.70,3.03,2.66,0.006,11.334,0.012,0.006,0.6222,0.6222,11.70,11.70,-18.69,-16.83,15.41,17.69,12.48,9.02,9.53,-19.76,-19.25,1.67,1.84,10.30,10.34,33.360,667.20,16.8035,16.74300,16.785000,16.795500,11419.100000,2972.500000,4896.800000,601.000,372.550000,27.300000,20289.250000,58.602462,2.930123,25.925,16.410,54.950000,13.200000,17.000000,100.009000
2020-07-15 11:06:00,4.776,3.228,4.650,4.758,0.012,4.686,0.7010,0.6652,0.9726,0.6930,7.95,8.23,16.86,8.74,2.95,2.66,0.012,10.932,0.012,0.006,0.6194,0.6194,11.70,11.70,-18.69,-16.81,15.44,17.69,12.48,9.02,9.53,-19.68,-19.30,1.67,1.84,10.32,10.34,33.072,661.44,16.7970,16.73600,16.780000,16.791000,11414.200000,2975.000000,4891.600000,600.000,373.100000,27.600000,20281.500000,58.674161,2.933708,25.950,16.420,54.900000,13.400000,17.000000,100.008000
2020-07-15 11:09:00,4.638,3.228,4.602,4.722,0.012,4.650,0.7008,0.6644,0.9714,0.6858,7.91,8.17,16.86,8.76,2.93,2.64,0.006,10.212,0.012,0.006,0.6172,0.6172,11.67,11.67,-18.66,-16.83,15.39,17.69,12.48,9.02,9.47,-19.58,-19.25,1.67,1.84,10.30,10.32,32.088,641.76,16.7905,16.72900,16.775000,16.786500,11409.300000,2977.500000,4886.400000,599.000,373.650000,27.900000,20273.750000,58.745915,2.937296,25.975,16.430,54.850000,13.600000,17.000000,100.007000
2020-07-15 11:12:00,4.722,3.228,4.614,4.758,0.012,4.686,0.7030,0.6642,0.9728,0.6886,7.93,8.19,16.88,8.74,2.93,2.60,0.012,10.902,0.012,0.012,0.6160,0.6160,11.67,11.67,-18.66,-16.83,15.41,17.69,12.50,9.02,9.53,-19.48,-19.27,1.67,1.84,10.30,10.32,32.958,659.16,16.7840,16.72200,16.770000,16.782000,11404.400000,2980.000000,4881.200000,598.000,374.200000,28.200000,20266.000000,58.817724,2.940886,26.000,16.440,54.800000,13.800000,17.000000,100.006000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-03 06:42:00,4.290,4.224,0.006,4.272,0.012,0.006,0.6336,0.6150,0.6678,0.6372,5.34,5.10,8.70,5.91,2.21,2.23,0.006,0.006,0.012,0.006,0.8258,0.8258,11.88,11.88,-18.99,-17.70,11.88,12.29,12.22,9.12,9.53,-19.43,-20.33,1.60,1.77,14.90,21.55,12.840,256.80,24.6960,30.79650,29.904333,30.151667,9293.283333,1475.333333,3946.416667,1551.650,21.933333,55.816667,16344.433333,36.106075,1.805304,5.860,1.510,73.366667,35.116667,14.416667,99.365333
2020-11-03 06:45:00,4.266,4.224,0.012,4.272,0.012,0.006,0.6342,0.6142,0.6680,0.6348,5.36,5.10,8.68,5.97,2.09,2.19,0.006,0.006,0.012,0.012,0.8282,0.8282,11.93,11.93,-18.97,-17.66,11.93,12.31,12.22,9.14,9.53,-19.27,-20.06,1.60,1.77,15.12,21.70,12.828,256.56,24.7450,30.78125,29.897500,30.162500,9293.375000,1483.000000,3950.375000,1542.125,23.500000,55.875000,16348.250000,36.285229,1.814261,5.850,1.475,73.250000,35.125000,14.375000,99.370000
2020-11-03 06:48:00,4.200,4.206,0.006,4.266,0.012,0.006,0.6352,0.6124,0.6698,0.6348,5.36,5.12,8.70,5.99,2.04,2.21,0.006,0.006,9.006,9.102,0.8286,0.8286,11.97,11.97,-19.02,-17.73,11.95,12.29,12.22,9.14,9.51,-19.17,-19.78,1.60,1.77,15.26,21.77,30.816,616.32,24.7940,30.76600,29.890667,30.173333,9293.466667,1490.666667,3954.333333,1532.600,25.066667,55.933333,16352.066667,36.464300,1.823215,5.840,1.440,73.133333,35.133333,14.333333,99.374667
2020-11-03 06:51:00,4.218,4.212,0.012,4.260,0.012,0.006,0.6354,0.6122,0.6700,0.6336,5.36,5.10,8.70,6.01,1.94,2.21,0.006,0.012,10.332,0.006,0.8270,0.8270,12.01,12.01,-18.99,-17.73,11.95,12.31,12.25,9.14,9.53,-19.38,-19.63,1.60,1.77,15.39,21.87,23.076,461.52,24.8430,30.75075,29.883833,30.184167,9293.558333,1498.333333,3958.291667,1523.075,26.633333,55.991667,16355.883333,36.643287,1.832164,5.830,1.405,73.016667,35.141667,14.291667,99.379333


In [32]:
# normalize the data
from sklearn.preprocessing import MinMaxScaler

# fit transform
transformer = MinMaxScaler()
transformer.fit(data)

data_norm = transformer.transform(data)
data_norm = pd.DataFrame(data_norm, columns=data.columns, index=data.index)
data_norm.head()

Unnamed: 0_level_0,COOLER COMP POWER (kW) 1A,COOLER COMP POWER (kW) 1B,COOLER COMP POWER (kW) 2A,COOLER COMP POWER (kW) 2B,COOLER COMP POWER (kW) 3A,COOLER COMP POWER (kW) 3B,COOLER HUMIDITY FRONT,COOLER HUMIDITY LEFT,COOLER HUMIDITY REAR,COOLER HUMIDITY RIGHT,COOLER SLAB TEMP DOOR INSIDE LEFT,COOLER SLAB TEMP DOOR INSIDE RIGHT,COOLER SLAB TEMP DOOR OUTSIDE RIGHT,COOLER SOIL TEMP DOOR INSIDE LEFT,COOLER TEMP LEFT,COOLER TEMP RIGHT,FREEZER COMP POWER (kW) 4A,FREEZER COMP POWER (kW) 4B,FREEZER COMP POWER (kW) 5A,FREEZER COMP POWER (kW) 5B,FREEZER HUMIDITY LEFT,FREEZER HUMIDITY RIGHT,FREEZER SLAB TEMP DOOR OUTSIDE1,FREEZER SLAB TEMP DOOR OUTSIDE2,FREEZER SLAB TEMP FRONT LEFT,FREEZER SLAB TEMP REAR RIGHT,FREEZER SOIL TEMP DOOR INSIDE,FREEZER SOIL TEMP DOOR OUTSIDE1,FREEZER SOIL TEMP DOOR OUTSIDE2,FREEZER SOIL TEMP FRONT LEFT,FREEZER SOIL TEMP REAR RIGHT,FREEZER TEMP FRONT LEFT,FREEZER TEMP REAR RIGHT,GLYCOL PRESSURE RETURN,GLYCOL PRESSURE SUPPLY,GLYCOL TEMP RETURN,GLYCOL TEMP SUPPLY,TOTAL POWER (kW),TOTAL ENERGY (kWh),PRICE ($/MWH),1 HOUR PRICE PREDICT,2 HOUR PRICE PREDICT,3 HOUR PRICE PREDICT,NUCLEAR_MW,GAS_MW,HYDRO_MW,WIND_MW,SOLAR_MW,BIOFUEL_MW,TOTAL SUPPLY_MW,GRID EMISSION INTENSITY_kg/MWh,GRID EMISSIONS_kgs,OUTSIDE TEMP (oC),OUTSIDE DEW POINT (oC),OUTSIDE HUMIDITY (%),WIND DIR (DEGREES),WIND (km/h),PRESSURE (kPa)
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1
2020-07-15 11:00:00,0.8506,0.569474,0.890698,0.915621,0.00119,0.856354,0.441509,0.541722,0.924351,0.314925,0.694323,0.639922,0.885279,0.773494,0.553009,0.471631,0.000452,0.831941,0.824837,0.000861,0.219644,0.219644,0.208955,0.208955,0.466981,0.732057,0.761404,0.8125,0.535714,0.32,0.729167,0.061127,0.14936,0.785714,0.75,0.037097,0.021429,0.706403,0.706403,0.146326,0.299742,0.300349,0.278952,0.838635,0.50979,0.861572,0.146199,0.871194,0.122172,0.72102,0.505468,0.505468,0.841096,0.77377,0.357143,0.361111,0.309091,0.620779
2020-07-15 11:03:00,0.841876,0.567368,0.893023,0.919042,0.00119,0.860773,0.44434,0.54702,0.918277,0.338806,0.703057,0.636008,0.890355,0.773494,0.56447,0.450355,0.0,0.928256,0.000436,0.00043,0.217157,0.217157,0.208955,0.208955,0.466981,0.708134,0.761404,0.8125,0.5,0.293333,0.770833,0.061127,0.146989,0.785714,0.75,0.033871,0.019841,0.541067,0.541067,0.146281,0.299636,0.300273,0.278888,0.837367,0.510235,0.859697,0.145955,0.872482,0.123529,0.720291,0.506122,0.506122,0.841781,0.774098,0.356429,0.366667,0.309091,0.620519
2020-07-15 11:06:00,0.866957,0.566316,0.901163,0.904219,0.00119,0.862983,0.44434,0.521854,0.924351,0.366418,0.707424,0.636008,0.892386,0.783133,0.541547,0.450355,0.000452,0.895332,0.000436,0.00043,0.211355,0.211355,0.208955,0.208955,0.466981,0.717703,0.766667,0.8125,0.5,0.293333,0.770833,0.064947,0.144618,0.785714,0.75,0.037097,0.019841,0.536395,0.536395,0.146235,0.29953,0.300197,0.278825,0.8361,0.51068,0.857823,0.145712,0.87377,0.124887,0.719562,0.506776,0.506776,0.842466,0.774426,0.355714,0.372222,0.309091,0.62026
2020-07-15 11:09:00,0.841876,0.566316,0.89186,0.897377,0.00119,0.856354,0.443396,0.516556,0.921038,0.339552,0.69869,0.624266,0.892386,0.787952,0.535817,0.443262,0.0,0.836364,0.000436,0.00043,0.206797,0.206797,0.186567,0.186567,0.481132,0.708134,0.757895,0.8125,0.5,0.293333,0.645833,0.069723,0.146989,0.785714,0.75,0.033871,0.018254,0.520436,0.520436,0.14619,0.299423,0.300121,0.278762,0.834833,0.511125,0.855948,0.145468,0.875059,0.126244,0.718832,0.507431,0.507431,0.843151,0.774754,0.355,0.377778,0.309091,0.62
2020-07-15 11:12:00,0.857143,0.566316,0.894186,0.904219,0.00119,0.862983,0.453774,0.515232,0.924903,0.35,0.703057,0.62818,0.894416,0.783133,0.535817,0.429078,0.000452,0.892875,0.000436,0.000861,0.20431,0.20431,0.186567,0.186567,0.481132,0.708134,0.761404,0.8125,0.535714,0.293333,0.770833,0.074499,0.146041,0.785714,0.75,0.033871,0.018254,0.534547,0.534547,0.146144,0.299317,0.300046,0.278698,0.833566,0.51157,0.854074,0.145224,0.876347,0.127602,0.718103,0.508086,0.508086,0.843836,0.775082,0.354286,0.383333,0.309091,0.61974


## Modelling the environment in tensorflow

In [13]:
# # example

# from numpy import sqrt
# from numpy import asarray
# from pandas import read_csv
# from tensorflow.keras import Sequential
# from tensorflow.keras.layers import Dense
# from tensorflow.keras.layers import LSTM
 
# # split a univariate sequence into samples
# def split_sequence(sequence, n_steps):
# 	X, y = list(), list()
# 	for i in range(len(sequence)):
# 		# find the end of this pattern
# 		end_ix = i + n_steps
# 		# check if we are beyond the sequence
# 		if end_ix > len(sequence)-1:
# 			break
# 		# gather input and output parts of the pattern
# 		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
# 		X.append(seq_x)
# 		y.append(seq_y)
# 	return asarray(X), asarray(y)
 
# # load the dataset
# path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-car-sales.csv'
# df = read_csv(path, header=0, index_col=0, squeeze=True)
# # retrieve the values
# values = df.values.astype('float32')
# # specify the window size
# n_steps = 5
# # split into samples
# X, y = split_sequence(values, n_steps)
# # reshape into [samples, timesteps, features]
# X = X.reshape((X.shape[0], X.shape[1], 1))
# # split into train/test
# n_test = 12
# X_train, X_test, y_train, y_test = X[:-n_test], X[-n_test:], y[:-n_test], y[-n_test:]
# print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# # define model
# model = Sequential()
# model.add(LSTM(100, activation='relu', kernel_initializer='he_normal', input_shape=(n_steps,1)))
# model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
# model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
# model.add(Dense(1))
# # compile the model
# model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# # fit the model
# model.fit(X_train, y_train, epochs=350, batch_size=32, verbose=2, validation_data=(X_test, y_test))
# # evaluate the model
# mse, mae = model.evaluate(X_test, y_test, verbose=0)
# print('MSE: %.3f, RMSE: %.3f, MAE: %.3f' % (mse, sqrt(mse), mae))
# # make a prediction
# row = asarray([18024.0, 16722.0, 14385.0, 21342.0, 17180.0]).reshape((1, n_steps, 1))
# yhat = model.predict(row)
# print('Predicted: %.3f' % (yhat))