### Libraries, paths, and set-up

In [None]:
import pandas as pd
import numpy as np
import os
import pickle
import tensorflow as tf
import keras
import sys
from tensorflow.keras.layers import Layer, Dense, LSTM, GRU, Dropout, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

# Ensure reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Set directory
os.chdir('/Users/manotas/Documents/GitHub-Repos/ML-Energy-Colombia')

# Custom Modules
from src.models.metrics import calculate_metrics


In [None]:
# Load the windowed data
with open('data/processed/train_non_overlapping_windows.pkl', 'rb') as f:
    X_train_windows, Y_train_windows = pickle.load(f)
with open('data/processed/test_non_overlapping_windows.pkl', 'rb') as f:
    X_test_windows, Y_test_windows = pickle.load(f)

# Store references for plant and agent names
train_references = [(window['plant'].iloc[0], window['agent'].iloc[0]) for window in X_train_windows]
test_references = [(window['plant'].iloc[0], window['agent'].iloc[0]) for window in X_test_windows]

# Drop 'datetime', 'plant', and 'agent' columns and convert data types to float
for window in X_train_windows:
    window.drop(columns=['datetime', 'plant', 'agent'], errors='ignore', inplace=True)
    window.loc[:, :] = window.astype(float)  # Convert all remaining columns to float

for window in X_test_windows:
    window.drop(columns=['datetime', 'plant', 'agent'], errors='ignore', inplace=True)
    window.loc[:, :] = window.astype(float)  # Convert all remaining columns to float

# Convert to numpy arrays
X_train_np = np.array([window.values for window in X_train_windows])
Y_train_np = np.array([window for window in Y_train_windows])
X_test_np = np.array([window.values for window in X_test_windows])
Y_test_np = np.array([window for window in Y_test_windows])


### Ensuring GPU availability

In [None]:
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
#print(f"Scikit-Learn {sk.__version__}")
#print(f"SciPy {sp.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print()
print("GPU is", "AVAILABLE" if gpu else "NOT AVAILABLE")

### T2V Layer

In [None]:
from tensorflow.keras.layers import Layer
import tensorflow as tf

class Time2Vec(Layer):
    def __init__(self, kernel_size=1, **kwargs):
        self.kernel_size = kernel_size
        super(Time2Vec, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='W',
                                 shape=(input_shape[-1], self.kernel_size),
                                 initializer='uniform',
                                 trainable=True)
        self.B = self.add_weight(name='B',
                                 shape=(1, self.kernel_size),
                                 initializer='uniform',
                                 trainable=True)
        self.w = self.add_weight(name='w',
                                 shape=(input_shape[-1],),
                                 initializer='uniform',
                                 trainable=True)
        self.b = self.add_weight(name='b',
                                 shape=(input_shape[-1],),
                                 initializer='uniform',
                                 trainable=True)
        super(Time2Vec, self).build(input_shape)

    def call(self, inputs):
        bias = self.w * inputs + self.b
        dp = tf.matmul(inputs, self.W) + self.B
        wgts = tf.math.sin(dp)
        return tf.concat([wgts, bias], -1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], 2 * input_shape[2])

### A model with hyperparameter options for tuning

In [None]:
def build_model(hp):
    inputs = Input(shape=(24, X_train_np.shape[2]))  # Assuming 24 hours and X_train has been preprocessed to have the right shape
    time2vec = Time2Vec(kernel_size=hp.Int('t2v_kernel_size', min_value=1, max_value=5))(inputs)
    
    x = time2vec
    
    for i in range(hp.Int('num_layers', 1, 3)):
        if hp.Choice('rnn_type', ['LSTM', 'GRU']) == 'LSTM':
            x = LSTM(units=hp.Int('units', min_value=32, max_value=128, step=32), 
                     return_sequences=(i != hp.Int('num_layers', 1, 3) - 1))(x)
        else:
            x = GRU(units=hp.Int('units', min_value=32, max_value=128, step=32), 
                    return_sequences=(i != hp.Int('num_layers', 1, 3) - 1))(x)
        if hp.Boolean('dropout'):
            x = Dropout(rate=0.2)(x)

    outputs = Dense(1)(x)
    
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
                  loss='mse', 
                  metrics=['mae'])
    return model


In [None]:
# Hyperparameter tuning
tuner = kt.Hyperband(build_model,
                     objective='val_mae',
                     max_epochs=50,
                     directory='hyperband',
                     project_name='time2vec_rnn_non_overlapping')

stop_early = EarlyStopping(monitor='val_loss', patience=5)

# Perform the hyperparameter search
tuner.search(X_train_np, Y_train_np, epochs=50, validation_data=(X_test_np, Y_test_np), callbacks=[stop_early])

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]


In [None]:
# Save the model
nw_model = tuner.get_best_models()[0]
nw_model.save('/Users/manotas/Desktop/models/hyperbandit/nw_model')

### Make predictions and calculate metrics

In [None]:
ypred_nw = nw_model.predict(val_data)
y_pred_nw = pd.DataFrame(ypred_nw)
y_pred_nw.to_csv('/Users/manotas/Desktop/models/hyperbandit/ypred_nw.csv', index=False, header=False)

In [None]:
calculate_metrics(val_targets, ypred_nw['ypred_nw'])