# Plot Results for Model Evaluation

In [None]:

import matplotlib.pyplot as plt
import pandas as pd
import os

# Load evaluation results
def load_results(file_path):
    return pd.read_csv(file_path)

# Plot accuracy vs input digit length
def plot_results(results, model_name):
    plt.figure(figsize=(10, 6))

    for mode in ["direct", "scratchpad", "rfft"]:
        plt.plot(results["digit_length"], results[mode], label=f"{model_name}: {mode.capitalize()}")

    plt.xlabel("Input Digit Length")
    plt.ylabel("Model Accuracy")
    plt.title(f"Re-Implementation Results for {model_name} Model (Direct, Scratchpad, RFFT)")
    plt.legend()
    plt.grid(True)
    plt.show()

# Main function
def main():
    # Define paths to evaluation result files
    gpt3_results_path = "gpt3.5_results.csv"
    llama_results_path = "llama_results.csv"

    # Check if result files exist
    if os.path.exists(gpt3_results_path):
        gpt3_results = load_results(gpt3_results_path)
        plot_results(gpt3_results, "GPT-3.5")
    else:
        print("Error: GPT-3.5 results file not found.")

    if os.path.exists(llama_results_path):
        llama_results = load_results(llama_results_path)
        plot_results(llama_results, "Llama-7B")
    else:
        print("Error: Llama-7B results file not found.")

if __name__ == "__main__":
    main()
