In [1]:
import pandas as pd
import os
import glob
from sklearn.metrics import mean_squared_error

# List of folder names to loop through
features = ['NR', 'PR', 'SR', 'SFST', 'WS', 'NR_Benefit', 'PR_Benefit', 'SR_Benefit', 'SFST_Benefit', 'WS_Benefit']

# Base directory containing all CSV folders
base_directory = './csvs/'

# Variables to track the overall best model and its MSE across all features
overall_best_mse = float('inf')
overall_best_model = None
overall_best_dataset = None
overall_best_feature = None

# Loop through each feature folder
for feature in features:
    print(f"Processing feature: {feature}")
    
    # Directory containing the output CSV files for the current feature
    output_csv_directory = os.path.join(base_directory, feature)
    
    # Read all the CSV files in the directory
    csv_files = glob.glob(os.path.join(output_csv_directory, '*.csv'))
    
    # Dictionary to store MSE for each model and each CSV file
    mse_dict = {}
    
    # Variables to track the best model and its MSE for the current feature
    best_mse = float('inf')
    best_model = None
    best_dataset = None
    
    # Function to extract the relevant part of the filename for the title
    def extract_relevant_part(filename):
        parts = filename.split('_')
        if len(parts) > 3:  # Ensure there are enough parts to extract
            return parts[2]  # Adjust index based on your filename format
        return "Unknown"  # Fallback if format is not as expected
    
    # Read each CSV file and compute the MSE by model
    for csv_file in csv_files:
        data = pd.read_csv(csv_file)
        file_mse = {}
        for model in data['Model'].unique():
            model_data = data[data['Model'] == model]
            actual = model_data['Actual']
            predicted = model_data['Predicted']
            
            # Clip predicted values to be within the range [0, 10]
            predicted = predicted.clip(lower=0, upper=10)
            
            mse = mean_squared_error(actual, predicted)
            file_mse[model] = mse
            
            # Update the best model if current MSE is lower
            if mse < best_mse:
                best_mse = mse
                best_model = model
                best_dataset = extract_relevant_part(os.path.basename(csv_file))
        mse_dict[best_dataset] = file_mse
    
    # Check if the best model for the current feature is better than the overall best model
    if best_mse < overall_best_mse:
        overall_best_mse = best_mse
        overall_best_model = best_model
        overall_best_dataset = best_dataset
        overall_best_feature = feature
    
    # Print the best model and its MSE for the current feature
    print(f"The best model for feature '{feature}' is '{best_model}' with an MSE of {best_mse:.2f} on the dataset '{best_dataset}'.")
    print(f"Finished processing feature: {feature}\n")

# Print the overall best model and its MSE across all features
print(f"\nThe overall best model across all features is '{overall_best_model}' with an MSE of {overall_best_mse:.2f} on the dataset '{overall_best_dataset}' from the feature '{overall_best_feature}'.")


Processing feature: NR
The best model for feature 'NR' is 'AdaBoostRegressor' with an MSE of 0.15 on the dataset 'interpolated.csv'.
Finished processing feature: NR

Processing feature: PR
The best model for feature 'PR' is 'AdaBoostRegressor' with an MSE of 0.13 on the dataset 'mean'.
Finished processing feature: PR

Processing feature: SR
The best model for feature 'SR' is 'MLPRegressor' with an MSE of 0.31 on the dataset 'bfill'.
Finished processing feature: SR

Processing feature: SFST
The best model for feature 'SFST' is 'AdaBoostRegressor' with an MSE of 0.22 on the dataset 'interpolated.csv'.
Finished processing feature: SFST

Processing feature: WS
The best model for feature 'WS' is 'AdaBoostRegressor' with an MSE of 0.27 on the dataset 'bfill'.
Finished processing feature: WS

Processing feature: NR_Benefit
The best model for feature 'NR_Benefit' is 'MLPRegressor' with an MSE of 0.29 on the dataset 'ffill'.
Finished processing feature: NR_Benefit

Processing feature: PR_Benefi