### Libraries, paths, and set-up

In [1]:
# Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import datetime as dt
import os
os.chdir('..')
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from requests.exceptions import RequestException
import joblib
import pickle
from src.models.metrics import *
from src.utils.utils import *
from src.data.window import *
import sys
import tensorflow.keras
import sklearn as sk
import scipy as sp
import platform

#Keras
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, LSTM, GRU, Dropout, Reshape, Input, Concatenate, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
import keras_tuner

import warnings
warnings.filterwarnings('ignore')

import sys
import tensorflow.keras
import sklearn as sk
import scipy as sp
import platform

#from google.colab import drive
#drive.mount('/content/drive')
#os.chdir('/content/drive/MyDrive/Colab_Notebooks')

# Loading the data
train_df = pd.read_csv('models/data/train_df.csv')
test_df = pd.read_csv('models/data/test_df.csv')

train_df.set_index(['datetime', 'plant'], inplace=True)
test_df.set_index(['datetime', 'plant'], inplace=True)

### Ensuring GPU availability

In [None]:
print(f"Python Platform: {platform.platform()}")
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print(f"SciPy {sp.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print()
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Python Platform: Linux-5.15.107+-x86_64-with-glibc2.31
Tensor Flow Version: 2.12.0
Keras Version: 2.12.0

Python 3.10.12 (main, Jun  7 2023, 12:45:35) [GCC 9.4.0]
Pandas 1.5.3
Scikit-Learn 1.2.2
SciPy 1.11.1

GPU is NOT AVAILABLE


In [None]:
# Get the current column list
cols = list(train_df.columns)

# Rearrange 'daily_ask' to the end
cols = [col for col in cols if col != 'daily_ask'] + ['daily_ask']

# Reindex the dataframe
train_df = train_df.reindex(columns=cols)
test_df = test_df.reindex(columns=cols)

In [None]:
# Window the data
train_nw = create_windows_no_overlap(train_df, window_size=5)
test_nw = create_windows_no_overlap(test_df, window_size=5)

with open('/Users/manotas/Desktop/models/train_nw.pkl','wb') as f:
  pickle.dump(train_nw, f)

with open('/Users/manotas/Desktop/models/test_nw.pkl','wb') as f:
  pickle.dump(test_nw, f)

### T2V Layer

In [None]:
# Define T2V layer
class T2V(Layer):
    def __init__(self, output_dim=None, **kwargs):
        self.output_dim = output_dim
        super(T2V, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='W',
                                shape=(1, self.output_dim),
                                initializer='uniform',
                                trainable=True)
        self.B = self.add_weight(name='B',
                                shape=(1, self.output_dim),
                                initializer='uniform',
                                trainable=True)
        self.w = self.add_weight(name='w',
                                shape=(1, 1),
                                initializer='uniform',
                                trainable=True)
        self.b = self.add_weight(name='b',
                                shape=(1, 1),
                                initializer='uniform',
                                trainable=True)
        super(T2V, self).build(input_shape)

    def call(self, x):
      original = self.w * x + self.b
      sin_trans = K.sin(K.dot(K.expand_dims(x), self.W) + self.B)
      return K.concatenate([sin_trans, K.expand_dims(original, -1)], -1)



    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim + 1)

### A model with hyperparameter options for tuning

In [None]:
def build_model(hp):
    inputs = Input(shape=(train_data.shape[1:]))  # Using the shapes from the train_data
    t2v = T2V(32)(inputs[:, :, -1])  # fixed dimension for time2vec
    x = Concatenate([inputs[:, :, :-1], t2v])

    # Only 1 or 2 RNN layers
    for i in range(hp.Int('num_rnn_layers', 1, 2)):
        if hp.Choice('rnn_type', ['gru', 'lstm']) == 'gru':
            x = GRU(hp.Int('rnn_units', 64, 128, step=32),
                    return_sequences=True if i != hp.Int('num_rnn_layers', 1, 2) - 1 else False,
                    activation='tanh')(x)  # Only tanh activation for GRUs
        else:
            x = LSTM(hp.Int('rnn_units', 64, 128, step=32),
                     return_sequences=True if i != hp.Int('num_rnn_layers', 1, 2) - 1 else False,
                     activation='tanh')(x)  # Only tanh activation for LSTMs

    # Only 1 Dense layer
    x = Dense(hp.Int('dense_units', 24, 32, step=8),
              activation='relu')(x)  # Only relu activation for dense layer

    outputs = Dense(1)(x)

    model = Model(inputs, outputs)

    model.compile(optimizer='adam',  # Only Adam optimizer
                  loss='mse',
                  metrics=['mae'])

    return model

In [None]:
# Convert windows to NumPy arrays for Keras model
train_data = np.array([window.iloc[:, :-1].values for window in train_nw])
train_targets = np.array([window.iloc[:, -1].values for window in train_nw])
val_data = np.array([window.iloc[:, :-1].values for window in test_nw])
val_targets = np.array([window.iloc[-1, -1] for window in test_nw])

print(train_data.shape)
print(train_targets.shape)
print(val_data.shape)
print(val_targets.shape)
print(train_w[0].shape)
print(test_w[0].shape)

(646032, 5, 12)
(646032, 5)
(64750, 5, 12)
(64750,)
(5, 13)
(5, 13)


In [None]:
ytrue_nw = pd.DataFrame(val_targets)
ytrue_nw.to_csv('ytrue_nw.csv', index=False, header=False)

In [None]:
with tf.device("/cpu:0"):
    tuner = keras_tuner.Hyperband(
        build_model,
        objective='val_mae',
        max_epochs=5,
        directory='/Users/manotas/Desktop/models/hyperbandit',
        project_name='non-overlapping')

# Train the model
tuner.search(train_data, train_targets, epochs=5, validation_data=(val_data, val_targets))

In [None]:
# Save the model
nw_model = tuner.get_best_models()[0]
nw_model.save('/Users/manotas/Desktop/models/hyperbandit/nw_model')

In [None]:
# Load the model
nw_model = load_model('/Users/manotas/Desktop/models/hyperbandit/nw_model')

### Make predictions and calculate metrics

In [None]:
ypred_nw = nw_model.predict(val_data)
y_pred_nw = pd.DataFrame(ypred_nw)
y_pred_nw.to_csv('/Users/manotas/Desktop/models/hyperbandit/ypred_nw.csv', index=False, header=False)



In [8]:
calculate_metrics(val_targets, ypred_nw['ypred_nw'])

RMSE: 0.13608105605079473
MAE: 0.08653568232878347
sMAPE(0-200): 12.012033018952344%
R-squared: 0.9281460132540098
