In [205]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from sklearn.utils.multiclass import type_of_target

In [173]:
data = pd.read_csv('../data/merged_data.csv', index_col = 0)
time_step = len(data)
TARGET_VAR = "price_per_dozen"

X = np.array(data[['disaster_cost_adjusted', 'human_outbreaks_per_million', 'covid_hospitalization_per_million', 
          'infected_flock_cnt', 'infected_h5n1_people_cnt', 'gas_price_per_gallon', 'temp_variance']].copy())
X_scaled = MinMaxScaler(feature_range = (0,1)).fit_transform(X.reshape(-1, 1))
Y = data.loc[:, TARGET_VAR:TARGET_VAR]

In [175]:
def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

X1, y1 = create_sequences(X_scaled, time_step)

In [211]:
def create_model(activation='linear',
                 batch_size = 1,
                 dropout_rate = 0.02):

    model = Sequential()
    model.add(LSTM(units = 32, activation=activation, input_shape=(X_scaled.shape)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss = 'mean_squared_error', optimizer=optimizer)

    return model


estimator=KerasRegressor(build_fn=create_model, verbose=0, epochs = 10)

param_grid = {
    'batch_size':   [20, 40, 60, 80],
    'epochs': [10, 15, 20]
}


In [179]:
X1.shape

(3246, 541)

In [213]:
scoring = {
    "MSE": make_scorer(mean_squared_error, greater_is_better=False),
    "MAE": make_scorer(mean_absolute_error, greater_is_better=False),
    "R2": make_scorer(r2_score)
}

grid_search = GridSearchCV(estimator, param_grid, cv = 5, scoring = scoring, refit = "R2", n_jobs = -1, verbose = True)
grid_search.fit(X1, y1)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits


 -0.00287497 -0.00289935 -0.00282886 -0.00283907 -0.00285945 -0.0028889 ]
 -0.01539273 -0.01753419 -0.01473687 -0.01410946 -0.0137565  -0.01629565]
 -0.19006194 -1.35881259 -0.19502836 -0.17918876 -0.08838469 -0.6768517 ]
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)


In [216]:
r2_scores = cross_val_score(best_model, X1, y1, cv = 5, scoring = "r2")
mse_scores = -cross_val_score(best_model, X1, y1, cv = 5, scoring = "neg_mean_squared_error")
mae_scores = -cross_val_score(best_model, X1, y1, cv = 5, scoring = "neg_mean_absolute_error")

results_list = []



  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)
  X, y = self._initialize(X, y)
  super().__init__(**kwargs)


NameError: name 'name' is not defined

In [230]:
results_list.append({
    'Model': 'LSTM',
    'Best Params': best_params,
    'MAE': np.mean(mae_scores),
    'MAE Std': np.std(mae_scores),
    'RMSE': np.sqrt(np.mean(mse_scores)),
    'RMSE Std': np.std(np.sqrt(mse_scores)),
    'MSE': np.mean(mse_scores),
    'MSE Std': np.std(mse_scores),
    'R2': np.mean(r2_scores),
    'R2 Std': np.std(r2_scores)
        })

In [234]:
# Convert results to a DataFrame
df_results = pd.DataFrame(results_list)
df_results.head()

Unnamed: 0,Model,Best Params,MAE,MAE Std,RMSE,RMSE Std,MSE,MSE Std,R2,R2 Std
0,LSTM,"{'batch_size': 80, 'epochs': 15}",0.01385,0.007327,0.053306,0.031268,0.002842,0.002955,-0.189044,0.189709


In [None]:
from pathlib import Path

model_results_file_path = Path("./model_results")
df_results.to_csv(f'{model_results_file_path}/lstm_cv_result.csv')