In [7]:
import pandas as pd
import glob
import numpy as np
from sklearn.metrics import mean_squared_error
import os

# Configuration for normalization
config = {
    "WS": {"min": 1.58, "max": 8.61},
    "PR": {"min": 2.07, "max": 10.0},
    "NR": {"min": 4.10, "max": 10.0},
    "SR": {"min": 2.29, "max": 10.0},
    "SFST": {"min": 0.0, "max": 7.71},
    "WS_Benefit": {"min": 0.08, "max": 10.0},
    "PR_Benefit": {"min": 0.49, "max": 10.0},
    "NR_Benefit": {"min": 0.71, "max": 10.0},
    "SR_Benefit": {"min": 0.49, "max": 8.79},
    "SFST_Benefit": {"min": 0.0, "max": 7.19}
}
# Function to normalize values based on the feature
def normalize_values(values, feature):
    min_val = config[feature]["min"]
    max_val = config[feature]["max"]
    return np.clip((values - min_val) / (max_val - min_val) * 10, 0, 10)

# List of possible feature names
feature_list = ["WS", "PR", "NR", "SR", "SFST", "WS_Benefit", "PR_Benefit", "NR_Benefit", "SR_Benefit", "SFST_Benefit"]


# Function to find the correct feature name in the CSV file path
def extract_feature_name(file_name):
    for feature in feature_list:
        if feature in file_name:
            return feature
    raise ValueError("No matching feature found in the file name.")

# Function to process a single CSV file, normalize the results, and save the new file
def process_and_normalize_file(csv_file):
    # Extract the feature name using the feature list
    feature = extract_feature_name(csv_file)
    
    # Load the CSV file
    df = pd.read_csv(csv_file)
    
    # Normalize 'Actual' and 'Predicted' columns
    #print(df['Actual'])
    df['Actual'] = df['Actual'].apply(lambda x: ','.join(map(str, normalize_values(np.array(list(map(float, x.split(',')))), feature))))
    #print(df['Actual'])
    df['Predicted'] = df['Predicted'].apply(lambda x: ','.join(map(str, normalize_values(np.array(list(map(float, x.split(',')))), feature))))
    
    # Recalculate MSE for each row using the normalized values
    df['MSE'] = df.apply(lambda row: mean_squared_error(
        np.array(list(map(float, row['Actual'].split(',')))),
        np.array(list(map(float, row['Predicted'].split(','))))
    ), axis=1)
    
    # Save the normalized results to a new CSV file with suffix '_post.csv'
    new_file_path = csv_file.replace('non_norm.csv', 'non_norm_post.csv')
    df.to_csv(new_file_path, index=False)
    print(f"Saved normalized and recalculated file: {new_file_path}")

# Process all 'test_results_***_non_norm.csv' files
csv_files = glob.glob('./test_results_*_non_norm.csv')  # Adjust the path to match your directory structure

for csv_file in csv_files:
    process_and_normalize_file(csv_file)


Saved normalized and recalculated file: .\test_results_WS_non_norm_post.csv
