### Libraries, paths, and set-up

In [1]:
# Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import datetime as dt
import os
os.chdir('..')
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from requests.exceptions import RequestException
import joblib
import pickle
from src.models.metrics import *
from src.utils.utils import *
from src.data.window import *
import sys
import tensorflow.keras
import sklearn as sk
import scipy as sp
import platform

#Keras
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, LSTM, GRU, Dropout, Reshape, Input, Concatenate, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
import keras_tuner

import warnings
warnings.filterwarnings('ignore')

import sys
import tensorflow.keras
import sklearn as sk
import scipy as sp
import platform

#from google.colab import drive
#drive.mount('/content/drive')
#os.chdir('/content/drive/MyDrive/Colab_Notebooks')

# Loading the data
train_df = pd.read_csv('models/data/train_df.csv')
test_df = pd.read_csv('models/data/test_df.csv')

train_df.set_index(['datetime', 'plant'], inplace=True)
test_df.set_index(['datetime', 'plant'], inplace=True)

In [None]:
# Loading the data
train_df = pd.read_csv('train_df.csv')
test_df = pd.read_csv('test_df.csv')

train_df.set_index(['datetime', 'plant'], inplace=True)
test_df.set_index(['datetime', 'plant'], inplace=True)

### Ensuring GPU availability

In [None]:
print(f"Python Platform: {platform.platform()}")
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print(f"SciPy {sp.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print()
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Python Platform: Linux-5.15.107+-x86_64-with-glibc2.31
Tensor Flow Version: 2.12.0
Keras Version: 2.12.0

Python 3.10.12 (main, Jun  7 2023, 12:45:35) [GCC 9.4.0]
Pandas 1.5.3
Scikit-Learn 1.2.2
SciPy 1.10.1

GPU is available


In [None]:
# Get the current column list
cols = list(train_df.columns)

# Rearrange 'daily_ask' to the end
cols = [col for col in cols if col != 'daily_ask'] + ['daily_ask']

# Reindex the dataframe
train_df = train_df.reindex(columns=cols)
test_df = test_df.reindex(columns=cols)

In [None]:
# Window the data
train_ow = create_windows(train_df, window_size=5)
test_ow = create_windows(test_df, window_size=5)


with open('/Users/manotas/Desktop/models/train_ow.pkl','wb') as f:
  pickle.dump(train_ow, f)

with open('/Users/manotas/Desktop/models/test_nw.pkl','wb') as f:
  pickle.dump(test_ow, f)

### T2V Layer

In [None]:
class T2V(Layer):
    def __init__(self, output_dim=None, **kwargs):
        self.output_dim = output_dim
        super(T2V, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='W',
                                 shape=(input_shape[-1], self.output_dim),
                                 initializer='uniform',
                                 trainable=True)
        self.B = self.add_weight(name='B',
                                 shape=(input_shape[-1], self.output_dim),
                                 initializer='uniform',
                                 trainable=True)
        self.P = self.add_weight(name='P',
                                 shape=(input_shape[-1], self.output_dim),
                                 initializer='uniform',
                                 trainable=True)
        super(T2V, self).build(input_shape)

    def call(self, x):
        original = self.W * K.expand_dims(x, -1) + self.B
        sin_trans = K.sin(original)
        return sin_trans + self.P

### A model with hyperparameter options for tuning

In [None]:
def build_model(hp):
    inputs = Input(shape=(6, train_w[0].shape[1]-1))  # Updated the shape
    t2v = T2V(32)(inputs[:, :, -1])  # fixed dimension for time2vec
    x = Concatenate([inputs[:, :, :-1], t2v])

    # Only 1 or 2 RNN layers
    for i in range(hp.Int('num_rnn_layers', 1, 2)):
        if hp.Choice('rnn_type', ['gru', 'lstm']) == 'gru':
            x = GRU(hp.Int('rnn_units', 64, 128, step=32),
                    return_sequences=True if i != hp.Int('num_rnn_layers', 1, 2) - 1 else False,
                    activation='tanh')(x)  # Only tanh activation for GRUs
        else:
            x = LSTM(hp.Int('rnn_units', 64, 128, step=32),
                     return_sequences=True if i != hp.Int('num_rnn_layers', 1, 2) - 1 else False,
                     activation='tanh')(x)  # Only tanh activation for LSTMs

    # Only 1 Dense layer
    x = Dense(hp.Int('dense_units', 24, 32, step=8),
              activation='relu')(x)  # Only relu activation for dense layer

    outputs = Dense(1)(x)

    model = Model(inputs, outputs)

    model.compile(optimizer='adam',  # Only Adam optimizer
                  loss='mse',
                  metrics=['mae'])

    return model


In [None]:
# Convert windows to NumPy arrays for Keras model
train_data = np.array([window.iloc[:, :-1].values for window in train_ow])
train_targets = np.array([window.iloc[:, -1].values for window in train_ow])
val_data = np.array([window.iloc[:, :-1].values for window in test_ow])
val_targets = np.array([window.iloc[-1, -1] for window in test_ow])

# Verify dimensions for model
print(train_data.shape)
print(train_targets.shape)
print(val_data.shape)
print(val_targets.shape)
print(train_ow[0].shape)
print(test_ow[0].shape)

(3230016, 6, 12)
(3230016, 6)
(323604, 6, 12)
(323604,)
(6, 13)
(6, 13)


In [None]:
ytrue_ow = pd.DataFrame(val_targets)
ytrue_ow.to_csv('ytrue_ow.csv', index=False, header=False)

In [None]:
# Set hyperbandit tuning
with tf.device("/cpu:0"):
    tuner = keras_tuner.Hyperband(
        build_model,
        objective='val_mae',
        max_epochs=5,
        directory='/Users/manotas/Desktop/models/hyperbandit/',
        project_name='rolling')

    tuner.search_space_summary()

# Train the model
tuner.search(train_data, train_targets, epochs=5, validation_data=(val_data, val_targets))

In [None]:
# Save the model
ow_model = tuner.get_best_models()[0]
ow_model.save('/Users/manotas/Desktop/models/hyperbandit/ow_model')

In [None]:
# Load the model
ow_model = load_model('/Users/manotas/Desktop/models/hyperbandit/ow_model')

### Make predictions and calculate metrics

In [None]:
ypred_ow = ow_model.predict(val_data)
y_pred_ow = pd.DataFrame(ypred_ow)
y_pred_ow.to_csv('/Users/manotas/Desktop/models/ypred_ow.csv', index=False, header=False)



In [5]:
calculate_metrics(val_targets, ypred_ow['ypred_ow'])

RMSE: 0.1327215141078611
MAE: 0.0854192860661992
sMAPE(0-200): 11.82219866414506%
R-squared: 0.9316543260943337
