<a href="https://colab.research.google.com/github/thegayankalinga/Software-Effort-Estimation-Model/blob/main/see_implementation_v5_p4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Phase 04: Hybrid Model Implementation

### Install required libraries

In [1]:
# Install required libraries if not installed
!pip install pandas numpy tensorflow keras xgboost matplotlib seaborn scikit-learn joblib --quiet

# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from xgboost import XGBRegressor
import joblib
import os
from sklearn.metrics import mean_squared_error, r2_score

# Define paths
DATA_PATH = "/content/drive/MyDrive/Projects/msc_project/results_data/"
MODELS_PATH = "/content/drive/MyDrive/Projects/msc_project/models/"
PERFORMANCE_DATA = "/content/drive/MyDrive/Projects/msc_project/performance_data/"

# Ensure directories exist
# os.makedirs(MODELS_PATH, exist_ok=True)
# os.makedirs(PERFORMANCE_DATA, exist_ok=True)

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


ValueError: mount failed

In [None]:
print(tf.__version__)

### Load the Data & Models

In [38]:

from tensorflow.keras.models import load_model
import tensorflow as tf

# Load preprocessed datasets
X_test = pd.read_csv(os.path.join(DATA_PATH, "X_test.csv"))
y_test = pd.read_csv(os.path.join(DATA_PATH, "y_test.csv"))

# Convert to NumPy arrays for TensorFlow & XGBoost
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

# Reshape data for LSTM (LSTM requires 3D input: (samples, time_steps, features))
X_test_lstm = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# # Load trained models
# lstm_model = load_model(os.path.join(MODELS_PATH, "lstm_model.h5"), custom_objects={'mse': mse}) # Pass mse to custom_objects
# xgb_model = joblib.load(os.path.join(MODELS_PATH, "best_xgboost.pkl"))

#Load trained models
# lstm_model = load_model(os.path.join(MODELS_PATH, "lstm_model.keras"), compile=False)
# lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# lstm_model = load_model(os.path.join(MODELS_PATH, "lstm_model.keras"))
# mlp_model = load_model(os.path.join(MODELS_PATH, "mlp_model.keras"))

# When loading, recompile with the same optimizer configuration
loaded_lstm_model = load_model(os.path.join(MODELS_PATH, "lstm_model.keras"))
loaded_lstm_model.compile(
    optimizer='rmsprop',
    loss='mse',
    metrics=['mae']  # or whatever metrics you used originally
)

# lstm_model = load_model(os.path.join(MODELS_PATH, "lstm_model.keras"))
xgb_model = joblib.load(os.path.join(MODELS_PATH, "best_xgboost.pkl"))

print("\n✅ LSTM and XGBoost models successfully loaded.")


✅ LSTM and XGBoost models successfully loaded.


  saveable.load_own_variables(weights_store.get(inner_path))


### Hybrid Model - Stacking (Weighted Averaging)

In [39]:
# Generate predictions from LSTM
y_pred_lstm = lstm_model.predict(X_test_lstm)

# Generate predictions from XGBoost
y_pred_xgb = xgb_model.predict(X_test)

# Define weights for model stacking (tuned manually or via validation)
alpha = 0.7  # Weight for LSTM
beta = 0.3   # Weight for XGBoost

# Compute weighted average predictions
y_pred_hybrid = (alpha * y_pred_lstm) + (beta * y_pred_xgb)

# Evaluate hybrid model
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    mmre = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10)))

    print(f"\n📌 {model_name} Model Evaluation:")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R-Squared: {r2:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"MMRE: {mmre:.4f}")

    return mse, rmse, r2, mape, mmre

# Evaluate Hybrid Stacking Model
hybrid_metrics = evaluate_model(y_test, y_pred_hybrid, "Hybrid Stacking (LSTM + XGBoost)")

[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

📌 Hybrid Stacking (LSTM + XGBoost) Model Evaluation:
MSE: 0.0024
RMSE: 0.0495
R-Squared: 0.9975
MAPE: 92.72%
MMRE: 0.9272


### Hybrid Model - Feature Level Fusion

In [41]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
import numpy as np

# First, let's create a new functional model with the same architecture
input_shape = (1, X_test.shape[1])  # (timesteps, features)

# Create input layer
inputs = Input(shape=input_shape)

# Add LSTM layers
lstm_1 = LSTM(128, activation='relu', return_sequences=True)(inputs)
lstm_2 = LSTM(64, activation='relu')(lstm_1)
outputs = Dense(y_test.shape[1], activation='linear')(lstm_2)

# Create the full model
full_model = Model(inputs=inputs, outputs=outputs)

# Load the weights from your saved model
full_model.load_weights(os.path.join(MODELS_PATH, "lstm_model.keras"))

# Print layers to verify
print("Model layers:")
for i, layer in enumerate(full_model.layers):
    print(f"Layer {i}: {layer.name} ({layer.__class__.__name__})")

# Now create the feature extractor model using index
# Get the second LSTM layer (index 2)
lstm_feature_extractor = Model(inputs=full_model.input, outputs=full_model.layers[2].output)

# Extract LSTM features
X_test_lstm_features = lstm_feature_extractor.predict(X_test_lstm)
print("\n✅ Successfully extracted LSTM features.")
print(f"Extracted features shape: {X_test_lstm_features.shape}")

# Reshape if needed (if the output is 3D, flatten it to 2D)
if len(X_test_lstm_features.shape) == 3:
    X_test_lstm_features = X_test_lstm_features.reshape(X_test_lstm_features.shape[0], -1)
    print(f"Reshaped features shape: {X_test_lstm_features.shape}")

# Concatenate original features with LSTM extracted features
X_test_fusion = np.concatenate((X_test, X_test_lstm_features), axis=1)
print(f"Final fusion features shape: {X_test_fusion.shape}")

# Retrain XGBoost on new feature set
xgb_fusion_model = XGBRegressor(objective="reg:squarederror", n_estimators=100, random_state=42)
xgb_fusion_model.fit(X_test_fusion, y_test)

# Generate predictions
y_pred_fusion = xgb_fusion_model.predict(X_test_fusion)

# Evaluate Feature-Level Fusion Model
fusion_metrics = evaluate_model(y_test, y_pred_fusion, "Feature-Level Fusion (LSTM Features + XGBoost)")

Model layers:
Layer 0: input_layer_6 (InputLayer)
Layer 1: lstm_12 (LSTM)
Layer 2: lstm_13 (LSTM)
Layer 3: dense_6 (Dense)
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

✅ Successfully extracted LSTM features.
Extracted features shape: (7500, 64)
Final fusion features shape: (7500, 105)

📌 Feature-Level Fusion (LSTM Features + XGBoost) Model Evaluation:
MSE: 0.0004
RMSE: 0.0188
R-Squared: 0.9996
MAPE: 54.03%
MMRE: 0.5403


### Save Performance Data & Best Hybrid Model

In [43]:
# Store performance data

import joblib
import os

best_model_path = os.path.join(MODELS_PATH, "best_hybrid_model.pkl")

performance_data = {
    "Model": ["Hybrid Stacking (LSTM + XGBoost)", "Feature-Level Fusion (LSTM Features + XGBoost)"],
    "MSE": [hybrid_metrics[0], fusion_metrics[0]],
    "RMSE": [hybrid_metrics[1], fusion_metrics[1]],
    "R-Squared": [hybrid_metrics[2], fusion_metrics[2]],
    "MAPE (%)": [hybrid_metrics[3], fusion_metrics[3]],
    "MMRE": [hybrid_metrics[4], fusion_metrics[4]]
}

# Convert to DataFrame
performance_df = pd.DataFrame(performance_data)

# Save to CSV
performance_csv_path = os.path.join(PERFORMANCE_DATA, "hybrid_model_performance.csv")
performance_df.to_csv(performance_csv_path, index=False)

print(f"\n✅ Hybrid model performance data saved to {performance_csv_path}")

# Save the best hybrid model
if hybrid_metrics[0] < fusion_metrics[0]:  # Choose model with lower MSE
    best_hybrid_model = "Hybrid Stacking"
    joblib.dump(y_pred_hybrid, best_model_path)
else:
    best_hybrid_model = "Feature-Level Fusion"
    joblib.dump(xgb_fusion_model, best_model_path)  # Ensure we're saving the trained model, not predictions
    # joblib.dump(y_pred_fusion, best_model_path)


print(f"\n✅ Best Hybrid Model Saved: {best_hybrid_model}")


✅ Hybrid model performance data saved to /content/drive/MyDrive/Projects/msc_project/performance_data/hybrid_model_performance.csv

✅ Best Hybrid Model Saved: Feature-Level Fusion
