SIMPLE MODELS BUT COMBINED WITH LSTM AND SARIMAX 

In [None]:
# CNN Combinations

import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D
from statsmodels.tsa.statespace.sarimax import SARIMAX


# Function to evaluate models
def evaluate_model(y_true, y_pred, model_name):
    # rmse = mean_squared_error(y_true, y_pred, squared=False)
    rmse = 0
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} - RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")
    
    # Scatter plot
    plt.scatter(y_true, y_pred, alpha=0.5)
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"{model_name} - Actual vs. Predicted")
    plt.show()
    
    return r2

# Train models
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
}

best_model = None
best_score = -np.inf

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = evaluate_model(y_test, y_pred, name)
    
    # Save the best model
    if r2 > best_score:
        best_score = r2
        best_model = model

# Save the best ML model
joblib.dump(best_model, "best_model.joblib")

# LSTM Model
X_train_reshaped = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
X_test_reshaped = np.reshape(X_test.values, (X_test.shape[0], 1, X_test.shape[1]))

lstm_model = Sequential([
    LSTM(64, return_sequences=False, input_shape=(1, X_train.shape[1])),
    Dense(1)
])
lstm_model.compile(optimizer="adam", loss="mse")
lstm_model.fit(X_train_reshaped, y_train, epochs=50, batch_size=16, verbose=0)
y_pred_lstm = lstm_model.predict(X_test_reshaped).flatten()
r2_lstm = evaluate_model(y_test, y_pred_lstm, "LSTM")

# CNN-LSTM Model
cnn_lstm = Sequential([
    Conv1D(64, kernel_size=1, activation='relu', input_shape=(1, X_train.shape[1])),
    LSTM(64, return_sequences=False),
    Dense(1)
])
cnn_lstm.compile(optimizer='adam', loss='mse')
cnn_lstm.fit(X_train_reshaped, y_train, epochs=50, batch_size=16, verbose=0)
y_pred_cnn_lstm = cnn_lstm.predict(X_test_reshaped).flatten()
r2_cnn_lstm = evaluate_model(y_test, y_pred_cnn_lstm, "CNN + LSTM")

# SARIMAX Model (Only for Time Series)
sarimax_model = SARIMAX(y_train, exog=X_train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
sarimax_model = sarimax_model.fit()
y_pred_sarimax = sarimax_model.predict(start=len(y_train), end=len(y_train) + len(y_test) - 1, exog=X_test)
r2_sarimax = evaluate_model(y_test, y_pred_sarimax, "SARIMAX")

# Select the best model
r2_scores = {
    "Best ML Model": best_score,
    "LSTM": r2_lstm,
    "CNN-LSTM": r2_cnn_lstm,
    "SARIMAX": r2_sarimax
}

final_best_model_name = max(r2_scores, key=r2_scores.get)

# Save the best overall model
if final_best_model_name == "LSTM":
    lstm_model.save("best_model.h5")
elif final_best_model_name == "CNN-LSTM":
    cnn_lstm.save("best_model.h5")
elif final_best_model_name == "SARIMAX":
    joblib.dump(sarimax_model, "best_model.joblib")
else:
    joblib.dump(best_model, "best_model.joblib")

print(f"Best Model: {final_best_model_name}")