In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import os 

def perform_t_test_and_save(model_name, sae_path, output_csv_path):
    # Load data from text files
    df = pd.read_csv(sae_path, sep=";")
    df['ukGT_usGT_score'] =  pd.to_numeric(df['ukGT_usGT_score'], errors='coerce')
    df = df[df['ukGT_usGT_score'] < 0.9]
    df = df.dropna()

    # Convert dataframes to numpy arrays
    sample_us = df.us_score.to_numpy()
    sample_gt = df.ukGT_usGT_score.to_numpy()
    sample_uk = df.uk_score.to_numpy()
    
    # Perform KS tests
    result_gt_uk = stats.ttest_rel(sample_uk, sample_gt, alternative='greater')
    result_gt_us = stats.ttest_rel(sample_us, sample_gt, alternative='greater')
    
    # Prepare data for CSV
    data = [
        [model_name, "GroundTruth_UK", result_gt_uk.statistic, result_gt_uk.pvalue],
        [model_name, "GroundTruth_US", result_gt_us.statistic, result_gt_us.pvalue]
    ]
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Model", "Metric", "Statistic", "P-Value"])
    
    # Check if the CSV file already exists to decide on adding a header
    file_exists = os.path.isfile(output_csv_path)
    
    # Save to CSV, append if file exists, include header if file does not exist
    df.to_csv(output_csv_path, mode='a', index=False, header=not file_exists)
    
    print(f"Results saved to {output_csv_path}")


In [2]:
perform_t_test_and_save(
    "Llama13B",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/results_llama13B.csv",
    "paired_t_test_results_case1.csv"
)
perform_t_test_and_save(
    "Llama7B",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/results_llama7B.csv",
    "paired_t_test_results_case1.csv"
)
perform_t_test_and_save(
    "Vicuna13B",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/results_vicuna13B.csv",
    "paired_t_test_results_case1.csv"
)
perform_t_test_and_save(
    "Vicuna7B",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/results_vicuna7B.csv",
    "paired_t_test_results_case1.csv"
)
perform_t_test_and_save(
    "Alpaca",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/results_alpaca.csv",
    "paired_t_test_results_case1.csv"
)
perform_t_test_and_save(
    "GPT-4",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/gpt_results.csv",
    "paired_t_test_results_case1.csv"
)

perform_t_test_and_save(
    "Gemini",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/gemini_results.csv",
    "paired_t_test_results_case1.csv"
)

perform_t_test_and_save(
    "Llama-70B",
    "/Users/nils/AutoPersonalizationBench/case1_CulturalSensitivity/results/llama_results.csv",
    "paired_t_test_results_case1.csv"
)

Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
Results saved to paired_t_test_results_case1.csv
