In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import joblib

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2',
    'OF3',
    'OF4',
    'OF5',
    'OF6',
    'OF7',
    'OF8',
    'OF9',
    'OF10',
    'OF11',
    'OF12',
    'OF13',
    'OF14',
    'OF15',
    'OF16',
    'OF17',
    'OF18',
    'OF19',
    'OF20',
    'OF21',
    'OF22',
    'OF23',
    'OF24',
    'OF25',
    'OF26',
    'OF27',
    'OF28',
    'OF29',
    'OF30',
    'OF31',
    'OF32',
    'OF33',
    'OF34',
    'OF37',
    'OF38',
    'F1',
    'F2',
    'F3_1',
    'F3_2',
    'F3_3',
    'F3_4',
    'F3_5',
    'F3_6',
    'F3_7',
    'F4',
    'F5',
    'F6',
    'F7',
    'F8',
    'F9',
    'F10',
    'F11',
    'F12',
    'F13',
    'F14',
    'F15',
    'F16',
    'F17',
    'F18',
    'F19',
    'F20',
    'F21',
    'F22',
    'F23',
    'F24',
    'F25',
    'F26',
    'F27',
    'F28',
    'F29',
    'F30',
    'F31',
    'F32',
    'F33',
    'F34',
    'F35',
    'F36',
    'F37',
    'F38',
    'F39',
    'F40',
    'F41',
    'F42',
    'F43',
    'F44',
    'F45',
    'F46',
    'F47',
    'F48',
    'F49',
    'F50',
    'F51',
    'F52',
    'F53',
    'F54',
    'F55',
    'F56',
    'F57',
    'F58',
    'F59',
    'F62',
    'F63',
    'F64',
    'F65',
    'F66',
    'F67',
    'F68',
    'S1',
    'S2',
    'S3',
    'S4',
    'S5',
    'S6',
    'PC',
    'FC',
    'WRI',
    'SVT',
    'VCHWC',
    'HWCC',
    'MC',
    'PP',
    'ST',
    'SWP',
    'DP',
    'ADLM',
    'ATDO',
    'AOD'
]

results_columns = ['SR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load trained models and their weights
model_directory = "SR_Benefit"

best_models = {}

# Load TensorFlow model
tensorflow_model_filename = './SR_Benefit/TensorFlow_model.h5'
model_tf = tf.keras.models.load_model(tensorflow_model_filename)
scaler_tf = joblib.load("./SR_Benefit/scaler_tf.pkl")

# Standardize the test data for TensorFlow model
X_test_scaled_tf = scaler_tf.transform(X_test)

# Evaluate the TensorFlow model
y_pred_tf = model_tf.predict(X_test_scaled_tf)
rmse_tf = mean_squared_error(y_test, y_pred_tf, squared=False)
print(f"TensorFlow RMSE: {rmse_tf}")

# Add TensorFlow model to best_models
best_models['TensorFlow'] = (model_tf, y_pred_tf)

# Load other models
sklearn_model_filenames = [filename for filename in os.listdir(model_directory) if filename.endswith("_model.pkl")]

for filename in sklearn_model_filenames:
    model_name = filename.split("_")[0]
    model = joblib.load(os.path.join(model_directory, filename))
    y_pred = model.predict(X_test)
    best_models[model_name] = (model, y_pred)


# Plot expected vs. predicted results for each model separately and save
output_directory = "SR_Benefit/plots"
os.makedirs(output_directory, exist_ok=True)
import numpy as np
for model_name, (model, y_pred) in best_models.items():
    plt.figure(figsize=(8, 6))
    
    # Filter predicted values outside the range of -20 to 20
    y_pred_clipped = np.clip(y_pred, -2, 10)
    
    plt.scatter(y_test, y_pred_clipped, color='blue', s=5)
    plt.plot([-2, 10], [-2, 10], 'k--', lw=2)
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    plt.title(f'Expected vs. Predicted Results for {model_name}')
    plt.savefig(os.path.join(output_directory, f"{model_name}_" + (results_columns[0]) + "_plot.png"))
    plt.close()

TensorFlow RMSE: 380.45800944589445


In [7]:
y_pred_tf

array([[3.0079887e+00],
       [1.6681199e+00],
       [3.1417327e+03],
       [1.3144172e+00],
       [1.6939529e+00],
       [2.0848451e+00],
       [3.6738439e+00],
       [2.7706995e+00],
       [6.3966346e+00],
       [4.7162566e+00],
       [2.6356475e+00],
       [2.3142576e+00],
       [6.9823809e+00],
       [4.0483503e+00],
       [5.4879475e+00],
       [1.3872666e+00],
       [6.5830832e+00],
       [7.5398951e+00],
       [8.9697075e+00],
       [6.5440378e+00],
       [1.6596693e+00],
       [1.1229579e+00],
       [7.8981175e+00],
       [9.7625035e-01],
       [2.6979580e+00],
       [4.3481474e+00],
       [6.0581570e+00],
       [1.2434810e+00],
       [3.6130013e+00],
       [1.0560638e+00],
       [2.9308138e+00],
       [1.9292848e+00],
       [1.5598289e+00],
       [3.3669174e-01],
       [2.2224774e+00],
       [3.4400721e+00],
       [2.0002167e+00],
       [1.0037372e+01],
       [2.2248840e+00],
       [4.1136599e+00],
       [3.0538416e+00],
       [6.816268