In [4]:
import pandas as pd

def calculate_accuracy(csv1, csv2):
    """
    Compares the 'Answer' column of two CSV files where the 'Question' column matches
    and calculates accuracy.
    
    :param csv1: Path to the first CSV file
    :param csv2: Path to the second CSV file
    :return: Accuracy as a percentage
    """
    
    # Load both CSVs
    df1 = pd.read_csv(csv1)
    df2 = pd.read_csv(csv2)
    
    # Merge both DataFrames on 'Question' to align answers
    merged_df = pd.merge(df1, df2, on="question", how="inner", suffixes=("_1", "_2"))
    
    # Count correct answers (exact match)
    correct_matches = (merged_df["Answer_1"] == merged_df["Answer_2"]).sum()
    
    # Total matched questions
    total_questions = len(merged_df)
    
    # Calculate accuracy
    accuracy = (correct_matches / total_questions) * 100 if total_questions > 0 else 0
    
    print(f"Matched Questions: {total_questions}")
    print(f"Correct Matches: {correct_matches}")
    print(f"Accuracy: {accuracy:.2f}%")
    
    return accuracy

# Example usage
csv2_path = "/Users/tanalpha_aditya/Desktop/ALM/Project/SPARK-Maths/Dataset/MATH/Processed_MATH_with_answer/output_dataset_with_answers2_final.csv"  # Replace with actual path
csv1_path = "/Users/tanalpha_aditya/Desktop/ALM/Project/SPARK-Maths/Baseline/GPT4->Gemini/Results/MATH/output_dataset_with_answers_gemini.csv"  # Replace with actual path

calculate_accuracy(csv1_path, csv2_path)


Matched Questions: 1769
Correct Matches: 596
Accuracy: 33.69%


np.float64(33.69135104578858)