In [17]:
import pandas as pd
import numpy as np
from scipy import stats
import os 
def transform_column_to_numpy_array(dataframe, column_name):
    # Splitting the strings by ';' in each row, converting to integers, and flattening
    all_values = [int(value) for row in dataframe[column_name] for value in row.split(';')]
    
    # Converting the list to a NumPy array
    numpy_array = np.array(all_values)
    
    return numpy_array


def perform_t_test_and_save(model_name, sae_path,  output_csv_path):
    # Load data from text files
    df = pd.read_csv(sae_path)
    
    df['acc'] = (df['pd'] + df['npd']) / (df['pd'] +df['npd']+df['wa'])
    df_wf = df[df['case'].isin(['WF_FD', 'WF_SD'])].sort_values(by=['dataset', 'case'])
    df_cf = df[df['case'].isin(['CF_FD', 'CF_SD'])].sort_values(by=['dataset', 'case'])

    # Calling the function with the sample DataFrame and column name
    sample_wf = transform_column_to_numpy_array(df_wf, 'persuasion_counts')
    sample_cf = transform_column_to_numpy_array(df_cf, 'persuasion_counts')


    print(len(sample_wf))
    print(len(sample_cf))
    # Convert dataframes to numpy arrays
    #sample_wf = df_wf['acc'].to_numpy()
    #sample_cf = df_cf['acc'].to_numpy()
    
    # Perform t tests
    result = stats.ttest_rel(sample_cf, sample_wf)
    
    # Prepare data for CSV
    data = [
        [model_name, "WF vs CF", result.statistic, result.pvalue],
    ]
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Model", "Metric", "Statistic", "P-Value"])
    
    # Check if the CSV file already exists to decide on adding a header
    file_exists = os.path.isfile(output_csv_path)
    
    # Save to CSV, append if file exists, include header if file does not exist
    df.to_csv(output_csv_path, mode='a', index=False, header=not file_exists)
    
    print(f"Results saved to {output_csv_path}")


In [3]:
perform_t_test_and_save(
    "Alpaca",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_alpaca.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "GPT-4",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_gpt-4-0613.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "GPT-3.5-instruct",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_gpt-3.5-turbo-instruct.csv",
    "paired_t_test_two_sided_results_case3.csv"
)


perform_t_test_and_save(
    "Llama7B",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_llama2-7b-chat.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "Llama13B",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_llama2-13b-chat.csv",    
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "Llama70B",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_llama2-70b-chat.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "Vicuna7B",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_vicuna-7b-v1.5.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

perform_t_test_and_save(
    "Vicuna13B",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_vicuna-13b-v1.5.csv",
    "paired_t_test_two_sided_results_case3.csv"
)



Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv
Results saved to paired_t_test_two_sided_results_case3.csv


In [18]:
perform_t_test_and_save(
    "Gemini",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_gemini.csv",
    "paired_t_test_two_sided_results_case3.csv"
)

3664
3664
Results saved to paired_t_test_two_sided_results_case3.csv
