In [3]:
from keras.layers import Dense, SimpleRNN, LSTM
from keras.models import Sequential
from pymongo import MongoClient
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split, GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mlflow

class MongoDatabase:
    def __init__(self):
        CONNECTION_STRING = "mongodb://netdb:netdb3230!@10.255.93.173:27017/"
        self.client = MongoClient(CONNECTION_STRING)

    def _fetch_data(self, collection_name, limit=None):
        try:
            collection = self.client["TestAPI"][collection_name]
            cursor = collection.find({}).limit(limit) if limit else collection.find({})
            return pd.DataFrame(list(cursor))
        except Exception as e:
            print(f"Error while fetching data from {collection_name}: {e}")
            return None

    def get_environment(self, limit=None):
        return self._fetch_data("GH1", limit)

    def get_growth(self, limit=None):
        return self._fetch_data("hydroponics_length1", limit)
    
def create_dataset(X, y, look_back=1):
    dataX, dataY = [], []
    for i in range(len(X) - look_back):
        sequence = X[i:(i + look_back), :]
        dataX.append(sequence)
        output = y[i + look_back]
        dataY.append(output)
    return np.array(dataX), np.array(dataY)

db = MongoDatabase()

# Y data
growth_data_1 = db.get_growth()
growth_data_2 = growth_data_1[['growth length   (cm)']]

# X data
environment_data_1 = db.get_environment(limit = 31200)
environment_data_2 = environment_data_1[['temp', 'humidity']]
environment_averaged = environment_data_2.groupby(environment_data_2.index // 100).mean(numeric_only=True).reset_index(drop=True)

# X+Y
training_data = pd.merge(environment_averaged, growth_data_2, left_index=True, right_index=True)

# split train, test
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(training_data)
X_data = data_normalized[:, :-1]
y_data = data_normalized[:, -1]
look_back = 24
X, Y = create_dataset(X_data, y_data, look_back)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.2, shuffle=False)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, shuffle=False)

def create_lstm_model(units = 64,
                    activation='relu',
                    recurrent_activation='sigmoid',
                    use_bias = True,
                    kernel_initializer= 'glorot_uniform',
                    recurrent_initializer = 'orthogonal',
                    bias_initializer = 'zeros',
                    unit_forget_bias = True,
                    kernel_regularizer = None,
                    recurrent_regularizer=None,
                    bias_regularizer=None,
                    activity_regularizer = None,
                    kernel_constraint = None,
                    recurrent_constraint = None,
                    bias_constraint = None,
                    dropout = 0.0,
                    recurrent_dropout = 0.0,
                    return_sequences = False,
                    return_state = False,
                    go_backwards = False,
                    stateful = None,
                    unroll = None):
    model = keras.Sequential()
    model.add(LSTM(
                    units=units,
                    activation= activation, 
                    recurrent_activation = recurrent_activation,
                    use_bias = use_bias,
                    kernel_initializer = kernel_initializer,
                    recurrent_initializer = recurrent_initializer,
                    bias_initializer = bias_initializer,
                    unit_forget_bias =  unit_forget_bias,
                    kernel_regularizer = kernel_regularizer,
                    recurrent_regularizer = recurrent_regularizer,
                    bias_regularizer = bias_regularizer,
                    activity_regularizer = activity_regularizer,
                    kernel_constraint = kernel_constraint,
                    recurrent_constraint = recurrent_constraint,
                    bias_constraint = bias_constraint,
                    dropout = dropout,
                    recurrent_dropout = recurrent_dropout,
                    return_sequences = return_sequences,
                    return_state = return_state,
                    go_backwards = go_backwards,
                    stateful = stateful,
                    unroll = unroll, 
                    input_shape=(look_back, 2)))
    model.add(Dense(1))

    model.compile(
        optimizer="adam",
        loss = 'mean_squared_error',
        metrics = [
            keras.metrics.MeanSquaredError(),
            keras.metrics.RootMeanSquaredError(),
            keras.metrics.MeanAbsoluteError()
        ]
    )
    return model

lstm_regressor = KerasRegressor(build_fn=create_lstm_model, verbose = 0)

param_grid = {
    'units' : [64, 128], # 64, 128 -> 128
    'epochs': [10], # 10
    'batch_size': [32, 64], #32, 64 -> 32
    'activation': ['sigmoid'], # relu, tanh, sigmoid, linear, swish -> sigmoid
    'recurrent_activation': ['sigmoid'], # relu, tanh, sigmoid, linear, swish -> sigmoid
    'dropout' : [0.1, 0.4], #1 0.0, 0.5 -> 0.3, 0.0, 0.2 -> 0.0 0.2 -> 0.0 / #2 0.0, 0.5, 1.0 -> 0.0, 0.5 -> 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 -> 0.1, 0.3, 0.2
    'recurrent_dropout': [0.0, 0.1] #2 0.0, 0.5, 1.0 -> 
}

grid_search = GridSearchCV(estimator=lstm_regressor, param_grid=param_grid)
grid_result = grid_search.fit(X_train, Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

results = pd.DataFrame(grid_result.cv_results_)
top_10_results = results.nlargest(10, 'mean_test_score')

print(top_10_results)

  lstm_regressor = KerasRegressor(build_fn=create_lstm_model, verbose = 0)


Best: -0.014701 using {'activation': 'sigmoid', 'batch_size': 64, 'dropout': 0.1, 'epochs': 10, 'recurrent_activation': 'sigmoid', 'recurrent_dropout': 0.1, 'units': 64}
    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
10       2.739567      0.211039         0.284349        0.012464   
7        3.674648      0.209412         0.299025        0.016248   
11       3.390584      0.270059         0.417442        0.240829   
1        2.018337      0.076179         0.223014        0.006258   
0        1.973951      0.167244         0.236570        0.002920   
15       4.214486      0.547643         0.310380        0.015840   
2        2.693821      0.152021         0.357066        0.144442   
6        2.885514      0.169892         0.317006        0.050763   
4        1.867851      0.068646         0.234572        0.012431   
5        2.438116      0.357424         0.246225        0.007647   

   param_activation param_batch_size param_dropout param_epochs  \
10          si