# Greenhouse Model

## Import dependencies and data

In [1]:
# import dependencies

import time
import os
import pickle
import json
from collections import deque
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import clear_output

In [2]:
# import prop. funcs and models

from data_funcs import k_fold_data_validation
from train_funcs import train_network, test_autoreg
from networks import feedForwardNeuralNetwork, recurrentNeuralNetwork
from opts import AdaGrad, RMSProp, Adam

In [3]:
# import external models

from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import GradientBoostingRegressor
# from xgboost import XGBRegressor

## Create data for specific fold

In [4]:
# define relevant paths
home_path = os.path.dirname(os.getcwd())
data_path = home_path + '\\data\\'
results_path = home_path + '\\nn_models\\results\\'

# get merged data
data = pd.read_csv(
    data_path + 'data_processed.csv',
    header=[0, 1],
    index_col=[0, 1, 2, 3, 4]
)

In [5]:
data.humidity.columns

Index(['TA01_GT10X_GM10X', 'DC_GT101_GM101', 'DC_GT102_GM102',
       'DC_GT103_GM103', 'DC_GT104_GM104', 'DC_GT401_GM401',
       'TA01_GT401_GM401', 'TA02_GT401_GM401', 'outdoor', 'DC_GT101_GM101_abs',
       'DC_GT102_GM102_abs', 'DC_GT103_GM103_abs', 'DC_GT104_GM104_abs',
       'TA01_GT10X_GM10X_abs', 'TA01_GT401_GM401_abs', 'TA02_GT401_GM401_abs',
       'outdoor_abs', 'TA01_GT401_GM401_rel', 'TA02_GT401_GM401_rel',
       'outdoor_rel', 'TA01_GT401_GM401_scaled', 'TA02_GT401_GM401_scaled',
       'outdoor_scaled', 'TA_inflow', 'TA_inflow_out'],
      dtype='object', name='sensor_ID')

In [6]:
# define data specs
k_frac = 0.05
m = 5
t_steps = 1
n_steps = 6

# Define cols to use
cols = [
    ('temperatures', 'TA01_GT10X_GM10X'),
    ('temperatures', 'TA01_GT401_GM401'),
    ('temperatures', 'DC_GT301_damped'),
    ('sun', 'gsi'),
    ('power', 'phase'),
    ('time', 'minofday'),
    ('humidity', 'TA01_GT10X_GM10X_abs'),
    ('humidity', 'TA01_GT401_GM401_abs'),
    ('humidity', 'outdoor_abs'),
    ('temperatures', 'TA01_GT401_GM401_scaled'),
    ('temperatures', 'DC_GT301_damped_scaled'),
#     ('state', 'TA01_output'),
#     ('state', 'TA02_output')
]

## K-fold CV - Neural Nets

In [7]:
version = 11

In [8]:
# training hyperparams
k1 = 10
k2 = 1
lambd = 0.01
sigma = 1.0
seed = 1

# more params
n_epochs = 50
n_batch = 128
lr = 0.001

In [9]:
for k_idx in range(11, 20):
    train_tup, test_tup, val_tup, col_params = k_fold_data_validation(
        data=data.copy(), 
        k_idx=k_idx, 
        k_frac=k_frac, 
        m=m, 
        cols=cols, 
        t_steps=t_steps, 
        n_steps=n_steps,
        setpoint=True,
        shuffle=False
    )
    
    # extract tuples
    (sequences_train, targets_train, temps_train, temps_t_train, _) = train_tup
    (sequences_test, targets_test, temps_test, temps_t_test, sequences_masked, _) = test_tup
    (sequences_val, targets_val, temps_val, temps_t_val, _) = val_tup
    
    ### ----- FEED-FORWARD NN -----
    # define network
    model_name = 'FFNN_v{}_k{}'.format(version, k_idx)
    units=[32, 32, 32]
    model = feedForwardNeuralNetwork(
        k1=k1,
        k2=k2,
        m=units,
        seed=seed
    )
    
    # define optimizer
    adam = Adam(
        beta1=0.9,
        beta2=0.999,
        eps=1e-8,
        weights=model.weights,
    )
    
    # set optimizer
    model.optimizer = adam
    
    # train model
    results = train_network(
        model=model,
        train_data=(sequences_train[:, -2, :], temps_t_train, targets_train),
        val_data=(sequences_val[:, -2, :], temps_t_val, targets_val),
        seed=seed,
        n_epochs=n_epochs,
        n_batch=n_batch,
        lambd=lambd,
        sigma=sigma,
        lr=lr,
        optimizer='adam'
    )
    
    # get autoregressive test predictions
    test_preds, test_encodings = test_autoreg(
        model,
        sequences_masked[:, -2, :],
        temps_t_test,
        targets_test,
        t_steps
    )
    
    results['test_preds'] = test_preds
    results['test_encodings'] = test_encodings
    
    # save results
    save_path = results_path + 'ffnn\\' + model_name + '.pickle'
    with open(save_path, 'wb') as fo:
        pickle.dump(results, fo)
    
    # clear output
    clear_output()
    
    
    ### ----- RECURRENT NN -----
    # define network
    model_name = 'RNN_v{}_k{}'.format(version, k_idx)
    units = 32
    model = recurrentNeuralNetwork(
        k1=k1,
        k2=k2,
        m=units,
        seed=seed
    )
    
    # define optimizer
    adam = Adam(
        beta1=0.9,
        beta2=0.999,
        eps=1e-8,
        weights=model.weights,
    )
    
    # set optimizer
    model.optimizer = adam
    
    # train model
    results = train_network(
        model=model,
        train_data=(sequences_train, temps_t_train, targets_train),
        val_data=(sequences_val, temps_t_val, targets_val),
        seed=seed,
        n_epochs=n_epochs,
        n_batch=n_batch,
        lambd=lambd,
        sigma=sigma,
        lr=lr,
        optimizer='adam'
    )
 
    # get autoregressive test predictions
    test_preds, test_encodings = test_autoreg(
        model,
        sequences_masked,
        temps_t_test,
        targets_test,
        t_steps
    )
    
    results['test_preds'] = test_preds
    results['test_encodings'] = test_encodings

    # save results
    save_path = results_path + 'rnn\\' + model_name + '.pickle'
    with open(save_path, 'wb') as fo:
        pickle.dump(results, fo)

    # clear output
    clear_output()
    

## K-fold CV - other models

In [10]:
# for k_idx in range(20):
#     train_tup, test_tup, val_tup, col_params = k_fold_data_validation(
#         data=data.copy(), 
#         k_idx=k_idx, 
#         k_frac=k_frac, 
#         m=m, 
#         cols=cols, 
#         t_steps=t_steps, 
#         n_steps=n_steps,
#         setpoint=True,
#         shuffle=False
#     )
    
#     # extract tuples
#     (sequences_train, targets_train, temps_train, temps_t_train, _) = train_tup
#     (sequences_test, targets_test, temps_test, temps_t_test, sequences_masked, _) = test_tup
#     (sequences_val, targets_val, temps_val, temps_t_val, _) = val_tup

#     ### TRAIN ARIMA
#     clear_output()
#     print('TRAINING ARIMA, k_idx: {}'.format(k_idx))
    
#     # get model name
#     model_name = 'arima_v{}_k{}'.format(version, k_idx)
    
#     # get endogenous and exogenous regressors
#     endog_train = temps_t_train.tolist()
#     exog_train = [np.array(seq) for seq in sequences_train[:, -2, :].tolist()]
# #     exog_train = [np.array(seq).mean(axis=0) for seq in sequences_train[:, -t_steps:, :].tolist()]

#     endog_test = temps_t_test.tolist()
#     exog_test = [np.array(seq) for seq in sequences_masked[:, -2, :].tolist()]
# #     exog_test = [np.array(seq).mean(axis=0) for seq in sequences_test[:, -t_steps:, :].tolist()]
    
#     # estimate model
#     start = time.time()
#     arima_temp = ARIMA(endog=endog_train, exog=exog_train, order=(n_steps, 1, 0)).fit()
#     train_time = time.time() - start
    
#     # get predictions (t steps)
#     print('PREDICTING w. ARIMA, k_idx: {}'.format(k_idx))
#     arima_preds = []
#     for exog in exog_test:
#         pred = arima_temp.forecast(steps=1, exog=exog)
#         arima_preds.append(pred)

#     save_path = results_path + 'arima\\' + model_name
#     #arima_temp.save(save_path + '_model')
    
#     results = {
#         'test_preds':arima_preds,
#         'train_time':train_time
#     }
#     with open(save_path + '.pickle', 'wb') as fo:
#         pickle.dump(results, fo)
        
#     ### TRAIN GBDT
#     clear_output()
#     print('TRAINING GBDT, k_idx: {}'.format(k_idx))
    
#     # get model name
#     model_name = 'gbdt_v{}_k{}'.format(version, k_idx)
#     gbdt = GradientBoostingRegressor(
#         loss='squared_error',
#         learning_rate=0.001,
#         n_estimators=2000,
#         max_depth=10,
#         max_leaf_nodes=None,
#         #subsample=0.7,
#         verbose=1,
# #         n_iter_no_change=10,
# #         tol=1e-4,
# #         validation_fraction=0.1
#     )
    
#     # train GBDT
#     start = time.time()
#     X_train = np.hstack((np.stack(exog_train), temps_t_train))
#     gbdt.fit(X_train, targets_train.flatten())
#     train_time = time.time() - start
    
#     # create queue for preds
#     pred_queue = deque(maxlen=t_steps)
#     for temp in temps_t_test[:t_steps]:
#         pred_queue.append(temp)
    
#     # iterate over test seqs and get preds
#     gbdt_preds = []
#     X_test = [np.array(seq) for seq in sequences_masked[:, -2, :].tolist()]
#     X_test = np.hstack((np.stack(X_test), temps_t_test))

#     for x in X_test:
#         temp = np.array([pred_queue.popleft()])
#         x[-1] = temp[0][0]
#         temp_pred = gbdt.predict(x[np.newaxis, :])
#         pred_queue.append(temp_pred)
#         gbdt_preds.append(temp_pred)
        
#     # save model
#     save_path = results_path + 'gbdt\\' + model_name
# #     with open(save_path + '_model.pickle'.format(gbdt_name), 'wb') as fo:
# #         pickle.dump(gbdt, fo)
        
#     # save model results
#     results = {
#         'test_preds':gbdt_preds,
#         'train_loss':gbdt.train_score_,
#         'train_time':train_time,
#         'model_params':gbdt.get_params(),
#     }
#     with open(save_path + '.pickle', 'wb') as fo:
#         pickle.dump(results, fo)