In [3]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import os
from sklearn.ensemble import GradientBoostingRegressor
import matplotlib.pyplot as plt
from collections import Counter

# Configuration for all functions
config = {
    "WS": {"min": 1.58, "max": 8.61, "lower": 3.07, "higher": 6.17},
    "PR": {"min": 2.07, "max": 10.0, "lower": 3.66, "higher": 6.11},
    "NR": {"min": 4.10, "max": 10.0, "lower": 2.06, "higher": 4.42},
    "SR": {"min": 2.29, "max": 10.0, "lower": 3.02, "higher": 6.67},
    "SFST": {"min": 0.0, "max": 7.71, "lower": 1.05, "higher": 6.51},
    "WS_Benefit": {"min": 0.08, "max": 10.0, "lower": 2.65, "higher": 6.50},
    "PR_Benefit": {"min": 0.49, "max": 10.0, "lower": 3.29, "higher": 6.68},
    "NR_Benefit": {"min": 0.71, "max": 10.0, "lower": 4.10, "higher": 7.76},
    "SR_Benefit": {"min": 0.49, "max": 8.79, "lower": 2.94, "higher": 6.19},
    "SFST_Benefit": {"min": 0.0, "max": 7.19, "lower": 1.86, "higher": 5.30}
}

# Define feature sets for each function
feature_selection = {
    'PR': ['OF22', 'OF26', 'OF27', 'F17', 'F20', 'F21', 'F23', 'F24', 'F28', 'F29', 'F31', 'F33', 'F34', 'F35', 'F36', 'F38', 'F43', 'F44', 'F45', 'F49', 'F63'],
    'NR': ['OF16', 'OF18', 'OF22', 'OF25', 'OF26', 'OF27', 'F1', 'F3_a', 'F3_b', 'F3_c', 'F3_d', 'F3_e', 'F3_f', 'F3_g', 'F6', 'F17', 'F18', 'F20', 'F21', 'F22', 'F23', 'F24', 'F28', 'F31', 'F33', 'F34', 'F36', 'F43', 'F44', 'F45', 'F48', 'F49', 'F54', 'S5'],
    'SR': ['OF22', 'OF26', 'OF27', 'F9', 'F17', 'F20', 'F22', 'F28', 'F29', 'F31', 'F33', 'F34', 'F35', 'F36', 'F43', 'F44', 'F45', 'F49', 'S5'],
    'WS': ['OF22', 'OF26', 'F3_a', 'F3_b', 'F3_c', 'F3_d', 'F3_e', 'F3_f', 'F3_g', 'F20', 'F21', 'F22', 'F28', 'F31', 'F43', 'F44', 'F45', 'F48', 'F49'],
    'SFST': ['F1', 'F3_a', 'F3_b', 'F3_c', 'F3_d', 'F3_e', 'F3_f', 'F3_g', 'F14', 'F17', 'F21', 'F24', 'F25', 'F29', 'F31', 'F33', 'F34', 'F43', 'F47', 'F48'],
    'PR_Benefit': ['OF18', 'OF19', 'OF20', 'OF21', 'OF22', 'OF23', 'OF24', 'F41', 'F48', 'F50', 'F52'],
    'NR_Benefit': ['OF9', 'OF10', 'OF11', 'OF19', 'OF20', 'OF21', 'OF22', 'OF23', 'OF24', 'F13', 'F41', 'F50', 'F51', 'F52'],
    'SR_Benefit': ['OF18', 'OF19', 'OF20', 'OF21', 'OF22', 'OF23', 'OF24', 'F24', 'F28', 'F41', 'F50', 'F52', 'F55', 'S4'],
    'WS_Benefit': ['OF8', 'OF17', 'OF18', 'OF23', 'OF24', 'F51'],
    'SFST_Benefit': ['OF18', 'OF22', 'OF25', 'OF27', 'OF28', 'F50']
}

# Function to load, evaluate, and retrain the model
def load_and_train_model(model_file, model_name, hyperparameters, column, data_columns):
    # Load the model
    model = joblib.load(model_file)
    
    # Extract the base CSV file name
    base_csv_name = '_'.join(os.path.basename(model_file).split('_')[:-2]) 

    # Load the data for this model
    csv_file = os.path.join('../../../Data_ML/4_out_csvs_regression', base_csv_name)
    data = pd.read_csv(csv_file)
    X = data[data_columns]
    y = data[column]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Retrain the model if it is a specific type
    if model_name == 'GradientBoostingRegressor':
        model = GradientBoostingRegressor(loss='squared_error', **hyperparameters)
        model.fit(X_train_scaled, y_train)

    # Predict using the model
    y_pred = model.predict(X_test_scaled)
    return model, X_test_scaled, y_test, y_pred, csv_file

def normalize(values, min_val, max_val):
    return 10 * ((values - min_val) / (max_val - min_val))

def classify(value, lower, higher):
    if value < lower:
        return "lower"
    elif value <= higher:
        return "moderate"
    else:
        return "higher"

def calculate_accuracy(actual, predicted):
    correct = sum(1 for a, p in zip(actual, predicted) if a == p)
    return correct / len(actual) if actual else 0

def voting_classification(predictions):
    return [Counter(pred).most_common(1)[0][0] for pred in zip(*predictions)]

# Function to evaluate models for a given function
def evaluate_models_for_function(function, params, base_dir, normalize_data=True):
    model_directory = f"{base_dir}/{function}"
    data_columns = feature_selection[function]  # Use specific feature set for the function
    results = []
    for model_file in os.listdir(model_directory):
        if model_file.endswith(".pkl"):
            model_name = model_file.split('_')[-2]
            hyperparameters = {}  # No hyperparameters provided when loading from file
            model, X_test_scaled, y_test, y_pred, csv_file = load_and_train_model(os.path.join(model_directory, model_file), model_name, hyperparameters, function, data_columns)
            
            if model is not None:
                if normalize_data:
                    # Normalize and classify the predictions
                    y_test_normalized = normalize(y_test, params['min'], params['max'])
                    y_pred_normalized = normalize(y_pred, params['min'], params['max'])
                    classified_actual = [classify(val, params['lower'], params['higher']) for val in y_test_normalized]
                    classified_predicted = [classify(val, params['lower'], params['higher']) for val in y_pred_normalized]
                else:
                    # Classify the predictions without normalization
                    classified_actual = [classify(val, params['lower'], params['higher']) for val in y_test]
                    classified_predicted = [classify(val, params['lower'], params['higher']) for val in y_pred]
                
                # Calculate accuracy
                accuracy = calculate_accuracy(classified_actual, classified_predicted)
                results.append((csv_file, model_name, accuracy, classified_actual, classified_predicted))
    
    return results

def print_results(function, results, label):
    # Sort results based on accuracy
    results.sort(key=lambda x: x[2], reverse=True)

    # Print the top result for each function
    if results:
        top_result = results[0]
        csv_file, model_name, accuracy, classified_actual, classified_predicted = top_result
        
        # Accumulate class-wise accuracy
        global_correct = {'lower': 0, 'moderate': 0, 'higher': 0}
        global_total = {'lower': 0, 'moderate': 0, 'higher': 0}
        for actual, predicted in zip(classified_actual, classified_predicted):
            global_total[actual] += 1
            if actual == predicted:
                global_correct[actual] += 1

        # Calculate and print global accuracy
        overall_accuracy = sum(global_correct.values()) / sum(global_total.values()) if sum(global_total.values()) > 0 else 0

        print(f"\n{function} Top Result ({label}):")
        print(f"CSV File: {os.path.basename(csv_file)}, Model: {model_name}, Accuracy: {accuracy:.2%}")
        print(f"Global Accuracy: {overall_accuracy:.2%}")
        
        for category in ['lower', 'moderate', 'higher']:
            category_accuracy = global_correct[category] / global_total[category] if global_total[category] > 0 else 0
            print(f"Accuracy for '{category}': {category_accuracy:.2%}")

        # Ensemble learning and voting
        top_5_models = results[:5]
        ensemble_predictions = [res[4] for res in top_5_models]
        ensemble_actual = top_5_models[0][3]
        
        ensemble_votes = voting_classification(ensemble_predictions)
        ensemble_accuracy = calculate_accuracy(ensemble_actual, ensemble_votes)

        # Overall voting accuracy calculation
        all_predictions = [res[4] for res in results]
        all_actual = results[0][3]
        
        voting_votes = voting_classification(all_predictions)
        voting_accuracy = calculate_accuracy(all_actual, voting_votes)

        # Per-category accuracies for ensemble and voting
        categories = ["lower", "moderate", "higher"]
        ensemble_category_accuracy = {category: 0 for category in categories}
        voting_category_accuracy = {category: 0 for category in categories}

        for category in categories:
            if ensemble_actual.count(category) > 0:
                ensemble_category_accuracy[category] = sum(1 for a, p in zip(ensemble_actual, ensemble_votes) if a == p == category) / ensemble_actual.count(category)
            else:
                ensemble_category_accuracy[category] = 0
            
            if all_actual.count(category) > 0:
                voting_category_accuracy[category] = sum(1 for a, p in zip(all_actual, voting_votes) if a == p == category) / all_actual.count(category)
            else:
                voting_category_accuracy[category] = 0

        # Print ensemble and voting results
        print(f"\n{function} Ensemble and Voting Results ({label}):")
        print(f"Overall Ensemble Accuracy: {ensemble_accuracy:.2%}")
        print(f"Overall Voting Accuracy: {voting_accuracy:.2%}")
        for category in categories:
            print(f"Ensemble Accuracy for '{category}': {ensemble_category_accuracy[category]:.2%}")
            print(f"Voting Accuracy for '{category}': {voting_category_accuracy[category]:.2%}")

# Evaluate models for each function in the config
for function, params in config.items():
    print(f"\nEvaluating Pre-Normalized Models for {function}...")
    results_norm = evaluate_models_for_function(function, params, "../TrainingResults/norm", normalize_data=False)
    print_results(function, results_norm, "Pre-Normalized")

    print(f"\nEvaluating Normalized Models (from Non-Norm Data) for {function}...")
    results_non_norm_normalized = evaluate_models_for_function(function, params, "../TrainingResults/non_norm", normalize_data=True)
    print_results(function, results_non_norm_normalized, "Normalized from Non-Normalized Data")

    print(f"\nEvaluating Normalized Models (from Non-Norm Data) for {function}...")
    results_non_norm_normalized = evaluate_models_for_function(function, params, "../TrainingResults/non_norm", normalize_data=False)
    print_results(function, results_non_norm_normalized, "Non-Normalized Data")



Evaluating Pre-Normalized Models for WS...

WS Top Result (Pre-Normalized):
CSV File: output_bfill_imputed.csv, Model: GradientBoostingRegressor, Accuracy: 100.00%
Global Accuracy: 100.00%
Accuracy for 'lower': 100.00%
Accuracy for 'moderate': 100.00%
Accuracy for 'higher': 100.00%

WS Ensemble and Voting Results (Pre-Normalized):
Overall Ensemble Accuracy: 100.00%
Overall Voting Accuracy: 33.33%
Ensemble Accuracy for 'lower': 100.00%
Voting Accuracy for 'lower': 100.00%
Ensemble Accuracy for 'moderate': 100.00%
Voting Accuracy for 'moderate': 0.00%
Ensemble Accuracy for 'higher': 100.00%
Voting Accuracy for 'higher': 0.00%

Evaluating Normalized Models (from Non-Norm Data) for WS...

WS Top Result (Normalized from Non-Normalized Data):
CSV File: output_bfill_imputed.csv, Model: GradientBoostingRegressor, Accuracy: 100.00%
Global Accuracy: 100.00%
Accuracy for 'lower': 100.00%
Accuracy for 'moderate': 100.00%
Accuracy for 'higher': 100.00%

WS Ensemble and Voting Results (Normalized f