# Greenhouse Model

## Import dependencies and data

In [1]:
# import dependencies
import os
import pickle
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model

In [2]:
# define relevant paths
home_path = os.path.dirname(os.getcwd())
data_path = home_path + '\\data\\'
plot_path = home_path + '\\plotting\\plots\\'
save_path = home_path + '\\model\\saved\\'
results_path = home_path + '\\model\\results\\'

In [3]:
# get merged data
data = pd.read_csv(
    data_path + 'data_processed.csv',
    header=[0, 1],
    index_col=[0, 1, 2, 3, 4]
)

# convert index.date col to datetime
#data.index = pd.to_datetime(data.index.values)
#data.loc[:, ('time', 'date')] = pd.to_datetime(data.time.date)

In [4]:
# print number of NAs
print('Number of NAs\n')
print(data.isna().sum())
print('\n\n')

Number of NAs

category  sensor_ID              
flow      TA01_GP101                 0
          TA02_GP101                 0
state     TA01_output                0
          TA02_output                0
power     phase                      0
                                    ..
humidity  TA01_GT401_GM401_scaled    0
          TA02_GT401_GM401_scaled    0
          outdoor_scaled             0
          TA_inflow                  0
          TA_inflow_out              0
Length: 68, dtype: int64





## MISC

In [5]:
def abs_humid(temp, rel_humid):
    abs_humidity =  6.112 * np.exp(17.67 * temp / (temp + 243.5)) * rel_humid * 2.1674 / (273.15 + temp)
    return abs_humidity

def rel_humid(temp, abs_humid):
    rel_humidity = abs_humid * (273.15 + temp) / (6.112 * np.exp(17.67 * temp / (temp + 243.5)) * 2.1674)
    return rel_humidity

In [6]:
ta01_min = 35
ta01_max = 75

ta02_min = 0
ta02_max = 85

## Split TRAIN / TEST

In [22]:
## Randomly select days for trainin and testing
days = data.groupby(['month', 'day'], sort=False).count().index.values

# get number of days f. testing / training
train_frac = 0.95
train_n = int(len(days) * train_frac)
test_n = len(days) - train_n

# split dataset
#np.random.shuffle(days)
mask = np.array([day in list(days[:train_n]) for day in data.index.droplevel(-1).droplevel(-1).droplevel(-1).values])
data_train = data.loc[mask].copy()
data_test = data.loc[~mask].copy()

# del data

### Reduce to M-min observations

In [23]:
# set min per observation
m = 5

In [24]:
# # reduce to one OBS per 5 MIN (take average of 10 OBS)
def data_reduce(data, m):
    idxObj = zip(
        data.index.get_level_values(0),
        data.index.get_level_values(1),
        data.index.get_level_values(2),
        data.index.get_level_values(3) // m
    )

    index = pd.MultiIndex.from_tuples(
        tuples=idxObj,
        names=['month', 'day', 'hour', 'minute']
    )

    data.index = index
    data = data.groupby(['month', 'day', 'hour', 'minute'], sort=False).mean()
    return data

In [25]:
data_train = data_reduce(data_train, m)
data_test = data_reduce(data_test, m)

### Normalization

In [26]:
# Define model variables
model_vars = [
#     ('temperatures', 'TA_inflow'),
#     ('humidity', 'TA_inflow'),
#     ('temperatures', 'TA_inflow_out'),
#     ('humidity', 'TA_inflow_out'),
#     ('state', 'TA01_output'),
#     ('state', 'TA02_output'),
#     ('flow', 'TA01_GP101'),
#     ('flow', 'TA02_GP101'),
#     ('temperatures', 'DC_GT102_GM102'),
#     ('temperatures', 'DC_GT103_GM103'),
#     ('temperatures', 'DC_GT104_GM104'),
    ('temperatures', 'TA01_GT10X_GM10X'),
    ('temperatures', 'TA01_GT401_GM401_scaled'),
    ('temperatures', 'TA02_GT401_GM401_scaled'),
    ('temperatures', 'DC_GT301_damped_scaled'),
#     ('temperatures', 'DC_GT301_outdoor_scaled'),
#     ('temperatures', 'DC_GT401_GM401'),
    ('temperatures', 'TA01_GT401_GM401'),
#     ('temperatures', 'TA02_GT401_GM401'),
#     ('temperatures', 'DC_GT301_damped'),
    ('temperatures', 'DC_GT301_outdoor'),
#     ('humidity', 'TA01_GT10X_GM10X'),
    ('humidity', 'TA01_GT10X_GM10X_abs'),
    ('humidity', 'TA01_GT401_GM401_abs'),
    ('humidity', 'TA02_GT401_GM401_abs'),
    ('humidity', 'outdoor_abs'),
    ('humidity', 'TA01_GT401_GM401_scaled'),
#     ('humidity', 'TA02_GT401_GM401_scaled'),
    ('humidity', 'outdoor_scaled'),
    ('sun', 'gsi'),
#     ('sun', 'gsi_deriv'),
#     ('sun', 'vol'),
#     ('sun', 'vol_deriv'),
#     ('wind', 'Wx'),
#     ('wind', 'Wy'),
    ('power', 'phase'),
    ('time', 'minofday'),
    ('time', 'dayofyear')
]

# filter data
data_train = data_train[model_vars].copy()
data_test = data_test[model_vars].copy()

In [27]:
col_params = {}
for col in data_train.columns:
#     cat, var = col
#     if cat == 'temperatures' and var == 'TA01_GT10X_GM10X':
#         continue
    
    min_val = data_train[col].min()
    max_val = data_train[col].max()
    
    # normalize
    mean = data_train[col].mean()
    std = data_train[col].std()
    
    data_train[col] = (data_train[col] - mean) / std
    data_test[col] = (data_test[col] - mean) / std
    
    col_params[col] = {
        'mean':mean,
        'std':std,
        'max':max_val,
        'min':min_val
    }

## Data preparation

In [28]:
t_steps = 3    # 5 x 2 = 15-min predictions
n_steps = 12    # 5 x 12 = 60-min backwards look

In [29]:
targets_train = pd.concat([data_train.pop(col) for col in [
#     ('temperatures', 'DC_GT102_GM102'),
#     ('temperatures', 'DC_GT103_GM103'),
#     ('temperatures', 'DC_GT104_GM104'),
    ('temperatures', 'TA01_GT10X_GM10X'),
    ('humidity', 'TA01_GT10X_GM10X_abs')
]], axis=1)

targets_test = pd.concat([data_test.pop(col) for col in [
#     ('temperatures', 'DC_GT102_GM102'),
#     ('temperatures', 'DC_GT103_GM103'),
#     ('temperatures', 'DC_GT104_GM104'),
    ('temperatures', 'TA01_GT10X_GM10X'),
    ('humidity', 'TA01_GT10X_GM10X_abs')
]], axis=1)

In [30]:
temps_train = targets_train.copy()[n_steps-t_steps:-t_steps]#.values
temps_test = targets_test.copy()[n_steps-t_steps:-t_steps]#.values

### Create sequences

In [31]:
def date_flagger(data, n_steps):
    # create flag for erroneous sequences
    data['hour'] = data.index.get_level_values(2).values    
    data['date_flag'] = data.hour - data.hour.shift(n_steps) > 1
    
    # get positions in data, w.r.t. n_step removed observations at start
    flagged_idx = np.where(data.date_flag.values == 1)
    flagged_idx = flagged_idx[0] - n_steps
    
    del data['hour'], data['date_flag']
    
    return flagged_idx

In [32]:
def seq_maker(data, targets, temps, n_steps):
    
    vals = data.values
    sequences = []
    for i in range(len(vals) - n_steps):
        sequences.append(vals[i:i+n_steps])
    sequences = np.stack(sequences)
    
    flags = date_flagger(data, n_steps)
    mask = [idx not in flags for idx in range(len(sequences))]
    
    sequences = sequences[mask]
    targets = targets[n_steps:][mask].values
    temps = temps[mask].values # TEMPORARY
    
    return sequences, targets, temps

In [33]:
# make sequences
sequences_train, targets_train, temps_train = seq_maker(data_train, targets_train, temps_train, n_steps)
sequences_test, targets_test, temps_test = seq_maker(data_test, targets_test, temps_test, n_steps)

In [34]:
sequences_masked = sequences_test.copy()

mask_len = t_steps
for t in range(1, mask_len):
    sequences_masked[:, -t, :] = sequences_masked[:, -(t_steps), :]

In [35]:
# shuffle training data randomly
idxs = np.arange(len(targets_train))
np.random.shuffle(idxs)

sequences_train = sequences_train[idxs]
targets_train = targets_train[idxs]
temps_train = temps_train[idxs]

## Train model

In [44]:
model_name = 'gh_lstm_w128_v3'

In [45]:
tf.keras.backend.clear_session()

In [46]:
model_load = tf.keras.models.load_model(save_path + model_name)

In [47]:
inputs = layers.Input(shape=(sequences_train.shape[1], sequences_train.shape[2]))
inputs = layers.BatchNormalization()(inputs)

_, encoder, _ = layers.LSTM(units=128, recurrent_dropout=0.2, return_sequences=False, return_state=True)(inputs)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Dropout(0.2)(encoder)

temp_input = layers.Input(shape=(2,))
temp = layers.GaussianNoise(stddev=1.0)(temp_input)
temp = layers.BatchNormalization()(temp)

output = layers.Concatenate()([encoder, temp])
output = layers.Dense(units=2, activation=None, use_bias=False)(output)

encoded = Model(inputs, encoder)
model = Model([inputs, temp_input], output)

In [48]:
lr = 0.001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='mse')
# model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr), loss='mse')
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 12, 14)]     0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, 128),        73216       ['input_4[0][0]']                
                                 (None, 128),                                                     
                                 (None, 128)]                                                     
                                                                                                  
 input_2 (InputLayer)           [(None, 2)]          0           []                               
                                                                                            

In [49]:
for layer, layer_loaded in zip(model.layers, model_load.layers):
    layer.set_weights(layer_loaded.get_weights())

In [50]:
# # copy weigths from pre-trained
# for idx, layer in enumerate(model.layers):
#     layer.set_weights(model2.layers[idx].get_weights())

In [51]:
# train model
trainHist = model.fit(
    [sequences_train, temps_train],
    targets_train,
    epochs=10,
    batch_size=128,
    validation_split=0.0,
    callbacks=[
#         tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=False),
#         tf.keras.callbacks.LearningRateScheduler(lambda epoch: lr * (0.9 ** epoch))
    ],
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [52]:
# save training results
with open(results_path + '{}_X.pickle'.format(model_name), 'wb') as fo:
    pickle.dump(trainHist, fo)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\batch_normalization
......vars
.........0
.........1
.........2
.........3
...layers\batch_normalization_1
......vars
.........0
.........1
.........2
.........3
...layers\concatenate
......vars
...layers\dense
......vars
.........0
...layers\dropout
......vars
...layers\gaussian_noise
......vars
...layers\input_layer
......vars
...layers\input_layer_1
......vars
...layers\lstm
......vars
...layers\lstm\cell
......vars
.........0
.........1
.........2
...metrics\mean
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-09-06 20:48:07         4645
metad

In [53]:
# save model
model.save(save_path + model_name + '_X')
#encoded.save(save_path + model_name + '_encoder')



INFO:tensorflow:Assets written to: C:\ml_projects\rise-gh.git\rise-gh\model\saved\gh_lstm_w128_v3_X\assets


INFO:tensorflow:Assets written to: C:\ml_projects\rise-gh.git\rise-gh\model\saved\gh_lstm_w128_v3_X\assets
