In [8]:
import os
import pandas as pd

# Folder path
folder_path = "/home/metehan/Visual_Studio_Projects/code-gen/evaluation_files"

# Create a global 'metrics' folder in the current local directory
global_metrics_folder = "metrics"
os.makedirs(global_metrics_folder, exist_ok=True)

# Walk through all files in the folder and subfolders
for root, _, files in os.walk(folder_path):
    # Collect results for the current subfolder
    results = []
    subfolder_name = os.path.relpath(root, folder_path)  # Get the subfolder name (relative path)
    
    for file in files:
        if file.endswith(".csv"):
            file_path = os.path.join(root, file)
            file_name = os.path.relpath(file_path, folder_path)  # Get relative path for unique name
            
            try:
                # Read the file
                df = pd.read_csv(file_path)
                
                # Ensure required columns exist
                required_columns = ['numRefactoredCorrect', 'numOptimizedCorrect', 
                                    'total', 'runtimeOriginal', 'runtimeRefactored', 
                                    'runtimeOptimized', 'eslint_count_original', 
                                    'eslint_count_refactored', 'closure_count_original', 
                                    'closure_count_refactored']
                
                missing_columns = [col for col in required_columns if col not in df.columns]
                if missing_columns:
                    print(f"Skipping file {file_path}: Missing columns {missing_columns}")
                    continue
                
                # Calculate metrics
                # 1. Correctness (Accuracy Scores)
                df['accuracy_refactored'] = df['numRefactoredCorrect'] / df['total']
                df['accuracy_optimized'] = df['numOptimizedCorrect'] / df['total']
                average_accuracy_refactored = df['accuracy_refactored'].mean()
                average_accuracy_optimized = df['accuracy_optimized'].mean()

                # 2. Runtime Improvements
                df['runtime_improvement_refactored'] = df['runtimeOriginal'] / df['runtimeRefactored']
                df['runtime_improvement_optimized'] = df['runtimeOriginal'] / df['runtimeOptimized']
                average_runtime_improvement_refactored = df['runtime_improvement_refactored'].mean()
                average_runtime_improvement_optimized = df['runtime_improvement_optimized'].mean()

                # 3. Linter Issues Decrease (Percentage Change)
                df['eslint_change_percentage'] = (
                    (df['eslint_count_refactored'] - df['eslint_count_original']) / df['eslint_count_original']
                ) * 100

                df['closure_change_percentage'] = (
                    (df['closure_count_refactored'] - df['closure_count_original']) / df['closure_count_original']
                ) * 100

                average_eslint_change_percentage = df['eslint_change_percentage'].mean()
                average_closure_change_percentage = df['closure_change_percentage'].mean()

                # Append results for this file
                results.append({
                    "File": file_name,
                    "Average Accuracy (Refactored)": average_accuracy_refactored,
                    "Average Accuracy (Optimized)": average_accuracy_optimized,
                    "Average Runtime Improvement (Refactored)": average_runtime_improvement_refactored,
                    "Average Runtime Improvement (Optimized)": average_runtime_improvement_optimized,
                    "Average ESLint Change Percentage": average_eslint_change_percentage,
                    "Average Closure Change Percentage": average_closure_change_percentage,
                })

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
    
    # If there are results, save them to a CSV in the global 'metrics' folder
    if results:
        output_csv = os.path.join(global_metrics_folder, f"metrics_{subfolder_name.replace(os.sep, '_')}.csv")
        results_df = pd.DataFrame(results)
        results_df.to_csv(output_csv, index=False)
        print(f"Metrics saved for subfolder '{subfolder_name}' to {output_csv}")


Metrics saved for subfolder 'gemini_results_temp_0' to ./metrics/metrics_gemini_results_temp_0.csv
Metrics saved for subfolder 'result_gemini/results_gemini_0.2' to ./metrics/metrics_result_gemini_results_gemini_0.2.csv
Metrics saved for subfolder 'result_gemini/results_gemini_0' to ./metrics/metrics_result_gemini_results_gemini_0.csv
Metrics saved for subfolder 'gemini_results_temp_0.2' to ./metrics/metrics_gemini_results_temp_0.2.csv
Metrics saved for subfolder 'openai_results_temp_0' to ./metrics/metrics_openai_results_temp_0.csv
