In [1]:
import pandas as pd

# Define the file paths and their corresponding methods
file_paths = {
    'randomsearch': 'hasil/uji4/combined_xgboost_evaluations_with_group_randomsearch.csv',
    'gridsearch': 'hasil/uji4/combined_xgboost_evaluations_with_group_gridsearch.csv',
    'notuning': 'hasil/uji4/combined_xgboost_evaluations_with_group_notuning.csv'
}


In [5]:
# Dictionary to store the summary data
summary_data = {}

# Define the evaluation metrics
metrics = ['MSE', 'RMSE', 'MAE', 'R2']

# Load and process each file
for method_group, file_path in file_paths.items():
    try:
        # Load data
        data = pd.read_csv(file_path)
        
        # Create a dictionary to store the best evaluations for each metric
        best_evaluations = {}
        
        for metric in metrics:
            if metric in data.columns:
                # Find the row with the best value (min for errors, max for R2)
                if metric == 'R2':
                    best_evaluations[metric] = data.loc[data.groupby('Group')[metric].idxmax()]
                else:
                    best_evaluations[metric] = data.loc[data.groupby('Group')[metric].idxmin()]
        
        # Combine the best evaluations into a single DataFrame
        best_evaluations_df = pd.concat(best_evaluations.values()).drop_duplicates().reset_index(drop=True)
        
        # Add the method group to the DataFrame
        best_evaluations_df['MethodGroup'] = method_group
        
        # Store the best evaluations in the summary data
        summary_data[method_group] = best_evaluations_df
    
    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Combine all summaries into a single DataFrame
summary_df = pd.concat(summary_data.values(), ignore_index=True)

# Normalize the metrics to a 0-1 scale
summary_df['Normalized_R2'] = summary_df['R2'] / summary_df['R2'].max()
summary_df['Normalized_MAE'] = 1 - (summary_df['MAE'] / summary_df['MAE'].max())
summary_df['Normalized_MSE'] = 1 - (summary_df['MSE'] / summary_df['MSE'].max())
summary_df['Normalized_RMSE'] = 1 - (summary_df['RMSE'] / summary_df['RMSE'].max())

# Define weights for each metric
weight_R2 = 0.4
weight_MAE = 0.3
weight_MSE = 0.2
weight_RMSE = 0.1

# Calculate the weighted score for each method
summary_df['Score'] = (
    (weight_R2 * summary_df['Normalized_R2']) +
    (weight_MAE * summary_df['Normalized_MAE']) +
    (weight_MSE * summary_df['Normalized_MSE']) +
    (weight_RMSE * summary_df['Normalized_RMSE'])
)
sorted_summary_df = summary_df.sort_values(by='Score', ascending=False).reset_index(drop=True)
# Print the top result for quick reference
print("\nBest overall method and evaluation based on highest score:")
print(sorted_summary_df.iloc[0])

# Save the sorted summarized results to a CSV file
sorted_summary_file_path = 'hasil/uji4/0909_1_1_improved_best_evaluations_summary_weighted_scoring_sorted.csv'
sorted_summary_df.to_csv(sorted_summary_file_path, index=False)

# Identify the best method based on the highest score
best_overall = summary_df.loc[summary_df['Score'].idxmax()]

print("\nBest overall method and evaluation based on combined weighted criteria:")
print(best_overall)

# Save the summarized results to a CSV file
summary_file_path = 'hasil/uji4/0909_1_2_improved_best_evaluations_summary_weighted_scoring.csv'
summary_df.to_csv(summary_file_path, index=False)

print(f"Summary of best evaluations saved as {summary_file_path}")



Best overall method and evaluation based on highest score:
Test Size                                                        0.2
Method                           No Cross Validation (Random Search)
Best Params        {'subsample': 0.8, 'min_child_weight': 2, 'max...
Start Time                                       2024-06-10 22:49:52
End Time                                         2024-06-10 22:50:09
Duration (s)                                                17.56033
MAE                                                         2.262814
MSE                                                        13.040409
RMSE                                                        3.611151
R2                                                          0.799246
Group                                      CV_RandomSearch_if_by_lof
MethodGroup                                             randomsearch
Normalized_R2                                                    1.0
Normalized_MAE                             