In [8]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Base path setup
base_path = "/home/metehan/Visual_Studio_Projects/code-gen/evaluation_files"
output_base_path = "/home/metehan/Visual_Studio_Projects/code-gen/difficulty_level_comparisons"

# Folder structure
folders = {
    "gemini_results_temp_0": ["gemini-1.5-flash", "gemini-1.5-pro"],
    "llama3_90B_results_temp_0": [],
    "mixtral_results_temp_0": [],
    "openai_results_temp_0": ["gpt-3.5-turbo", "gpt-4-turbo"]
}

# Ensure output folders exist
for main_folder, sub_folders in folders.items():
    main_output_path = os.path.join(output_base_path, main_folder)
    os.makedirs(main_output_path, exist_ok=True)
    for sub_folder in sub_folders:
        os.makedirs(os.path.join(main_output_path, sub_folder), exist_ok=True)

# Function to create a stacked bar chart
def create_stacked_bar_chart(input_file, output_file):
    # Read the CSV
    df = pd.read_csv(input_file)

    # Calculate accuracy for each row
    df['refactored_accuracy'] = df['numRefactoredCorrect'] / df['total']
    df['optimized_accuracy'] = df['numOptimizedCorrect'] / df['total']

    # Group by difficulty and calculate averages
    grouped = df.groupby('level')[['refactored_accuracy', 'optimized_accuracy']].mean()

    # Create a stacked bar chart
    grouped.plot(kind='bar', stacked=True)
    plt.title("Average Accuracies by Difficulty Level")
    plt.ylabel("Accuracy")
    plt.xlabel("Difficulty")
    plt.legend(["Refactored", "Optimized"])

    # Explicitly set the order of the x-axis labels
    plt.xticks(ticks=[0, 1, 2], labels=['easy', 'medium', 'hard'])

    plt.tight_layout()

    # Save the plot
    plt.savefig(output_file)
    plt.close()

# Process each folder and generate charts
for main_folder, sub_folders in folders.items():
    main_input_path = os.path.join(base_path, main_folder)
    main_output_path = os.path.join(output_base_path, main_folder)

    if sub_folders:
        for sub_folder in sub_folders:
            input_path = os.path.join(main_input_path, sub_folder)
            output_path = os.path.join(main_output_path, sub_folder)
            for file_name in os.listdir(input_path):
                if file_name.endswith(".csv"):
                    input_file = os.path.join(input_path, file_name)
                    output_file = os.path.join(output_path, f"{os.path.splitext(file_name)[0]}.png")
                    create_stacked_bar_chart(input_file, output_file)
    else:
        for file_name in os.listdir(main_input_path):
            if file_name.endswith(".csv"):
                input_file = os.path.join(main_input_path, file_name)
                output_file = os.path.join(main_output_path, f"{os.path.splitext(file_name)[0]}.png")
                create_stacked_bar_chart(input_file, output_file)

print("Graph generation completed!")

Graph generation completed!


In [25]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Base paths
base_path = "/home/metehan/Visual_Studio_Projects/code-gen/evaluation_files"
output_base_path = "/home/metehan/Visual_Studio_Projects/code-gen/prompt_stacked_bar_charts"
os.makedirs(output_base_path, exist_ok=True)

# List of prompt prefixes
prompt_prefixes = [
    "PROMPT_CODEGEN_V1",
    "PROMPT_REFACTOR_V1",
    "PROMPT_REFACTOR_V2",
    "PROMPT_REFACTOR_V3",
    "PROMPT_REFACTOR_V4",
    "PROMPT_REFACTOR_V5"
]

# Function to traverse the file structure and find CSVs by prefix
def gather_csv_files_by_prefix(base_path, prefixes):
    file_dict = {prefix: [] for prefix in prefixes}
    for root, _, files in os.walk(base_path):
        for file_name in files:
            if file_name.endswith(".csv"):
                for prefix in prefixes:
                    if file_name.startswith(prefix):
                        file_path = os.path.join(root, file_name)
                        file_dict[prefix].append(pd.read_csv(file_path))
    return file_dict

# Function to create a stacked bar chart
def create_stacked_bar_chart(df, output_file):
    # Calculate accuracy for each row
    df['refactored_accuracy'] = df['numRefactoredCorrect'] / df['total']
    df['optimized_accuracy'] = df['numOptimizedCorrect'] / df['total']

    # Group by difficulty and calculate averages
    grouped = df.groupby('level')[['refactored_accuracy', 'optimized_accuracy']].mean()

    # Create a stacked bar chart
    grouped.plot(kind='bar', stacked=True)
    plt.title("Average Accuracies by Difficulty Level")
    plt.ylabel("Accuracy")
    plt.xlabel("Difficulty")
    plt.legend(["Refactored", "Optimized"])

    # Explicitly set the order of the x-axis labels
    plt.xticks(ticks=[0, 1, 2], labels=['easy', 'medium', 'hard'])

    plt.tight_layout()

    # Save the plot
    plt.savefig(output_file)
    plt.close()

# Main execution
def main():
    # Step 1: Gather CSV files
    csv_files_by_prefix = gather_csv_files_by_prefix(base_path, prompt_prefixes)

    # Step 2: Generate charts for each prompt prefix
    for prefix, data_frames in csv_files_by_prefix.items():
        if data_frames:  # Only proceed if there are matching files
            combined_df = pd.concat(data_frames, ignore_index=True)
            output_file = os.path.join(output_base_path, f"{prefix}.png")
            create_stacked_bar_chart(combined_df, output_file)

    print("Charts generated and saved in the prompt_stacked_bar_charts folder!")

# Run the script
if __name__ == "__main__":
    main()

Charts generated and saved in the prompt_stacked_bar_charts folder!


In [29]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Base paths
base_path = "/home/metehan/Visual_Studio_Projects/code-gen/evaluation_files"
output_base_path = "/home/metehan/Visual_Studio_Projects/code-gen/model_stacked_bar_charts"
os.makedirs(output_base_path, exist_ok=True)

# Function to traverse the file structure and gather CSVs by subfolder
def gather_csv_files_by_model(base_path):
    model_dict = {}
    for root, _, files in os.walk(base_path):
        subfolder = os.path.relpath(root, base_path)  # Get the relative path of the subfolder
        if subfolder == ".":
            continue  # Skip the base directory
        model_dict[subfolder] = []
        for file_name in files:
            if file_name.endswith(".csv"):
                file_path = os.path.join(root, file_name)
                model_dict[subfolder].append(pd.read_csv(file_path))
    return model_dict

# Function to create a stacked bar chart
def create_stacked_bar_chart(df, output_file, model_name):
    # Calculate accuracy for each row
    df['refactored_accuracy'] = df['numRefactoredCorrect'] / df['total']
    df['optimized_accuracy'] = df['numOptimizedCorrect'] / df['total']

    # Group by difficulty and calculate averages
    grouped = df.groupby('level')[['refactored_accuracy', 'optimized_accuracy']].mean()

    # Create a stacked bar chart
    grouped.plot(kind='bar', stacked=True)
    plt.title("Average Accuracies by Difficulty Level")
    plt.ylabel("Accuracy")
    plt.xlabel("Difficulty")
    plt.legend(["Refactored", "Optimized"])

    # Explicitly set the order of the x-axis labels
    plt.xticks(ticks=[0, 1, 2], labels=['easy', 'medium', 'hard'])

    plt.tight_layout()

    # Save the plot
    plt.savefig(output_file)
    plt.close()

# Main execution
def main():
    # Step 1: Gather CSV files for each model
    csv_files_by_model = gather_csv_files_by_model(base_path)

    # Step 2: Generate charts for each model
    for model, data_frames in csv_files_by_model.items():
        if data_frames:  # Only proceed if there are matching files
            combined_df = pd.concat(data_frames, ignore_index=True)
            model_name = model.replace("/", "_")  # Replace slashes for valid file naming
            output_file = os.path.join(output_base_path, f"{model_name}.png")
            create_stacked_bar_chart(combined_df, output_file, model_name)

    print("Charts generated and saved in the model_stacked_bar_charts folder!")

# Run the script
if __name__ == "__main__":
    main()

Charts generated and saved in the model_stacked_bar_charts folder!
