In [13]:
import pandas as pd
import numpy as np
from scipy import stats
import os 
def transform_column_to_numpy_array(dataframe, column_name):
    # Splitting the strings by ';' in each row, converting to integers, and flattening
    all_values = [int(value) for row in dataframe[column_name] for value in row.split(';')]
    
    # Converting the list to a NumPy array
    numpy_array = np.array(all_values)
    
    return numpy_array


def perform_t_test_and_save(model_name, sae_path,  output_csv_path):
    # Load data from text files
    df = pd.read_csv(sae_path)
    
    df['acc'] = (df['pd'] + df['npd']) / (df['pd'] +df['npd']+df['wa'])
    df_wf = df[df['case'].isin(['WF_FD', 'WF_SD'])].sort_values(by=['dataset', 'case'])
    df_cf = df[df['case'].isin(['CF_FD', 'CF_SD'])].sort_values(by=['dataset', 'case'])

    # Calling the function with the sample DataFrame and column name
    sample_wf = transform_column_to_numpy_array(df_wf, 'persuasion_counts')
    sample_cf = transform_column_to_numpy_array(df_cf, 'persuasion_counts')
    
    # Convert dataframes to numpy arrays
    #sample_wf = df_wf['acc'].to_numpy()
    #sample_cf = df_cf['acc'].to_numpy()
    
    # Perform KS tests
    result = stats.ks_2samp(sample_cf, sample_wf)
    
    # Prepare data for CSV
    data = [
        [model_name, "WF vs CF", result.statistic, result.pvalue],
    ]
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Model", "Metric", "Statistic", "P-Value"])
    
    # Check if the CSV file already exists to decide on adding a header
    file_exists = os.path.isfile(output_csv_path)
    
    # Save to CSV, append if file exists, include header if file does not exist
    df.to_csv(output_csv_path, mode='a', index=False, header=not file_exists)
    
    print(f"Results saved to {output_csv_path}")


In [23]:
perform_t_test_and_save(
    "Alpaca",
    "/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_alpaca.csv",
    "t_test_two_sided_results_case3.csv"
)


Results saved to t_test_two_sided_results_case3.csv


In [4]:
import pandas as pd
import numpy as np
df = pd.read_csv('/Users/nils/AutoPersonalizationBench/case3_Echochamber/results/results_alpaca.csv')
    
df['acc'] = (df['pd'] + df['npd']) / (df['pd'] +df['npd']+df['wa'])

In [5]:
df[['case', 'dataset', 'acc']]

Unnamed: 0,case,dataset,acc
0,WF_FD,Boolq,0.396996
1,WF_SD,Boolq,0.396996
2,CF_SD,Boolq,0.396996
3,CF_FD,Boolq,0.396996
4,WF_FD,NQ2,0.168103
5,WF_FD,NQ1,0.198704
6,WF_SD,NQ1,0.144708
7,WF_SD,NQ2,0.159483
8,CF_SD,NQ1,0.211663
9,CF_SD,NQ2,0.198276


In [11]:
def transform_column_to_numpy_array(dataframe, column_name):
    # Splitting the strings by ';' in each row, converting to integers, and flattening
    all_values = [int(value) for row in dataframe[column_name] for value in row.split(';')]
    
    # Converting the list to a NumPy array
    numpy_array = np.array(all_values)
    
    return numpy_array

# Calling the function with the sample DataFrame and column name
numpy_array = transform_column_to_numpy_array(df, 'persuasion_counts')


[  0 100 100 ...   0   0   0]


In [5]:
#0 = wrong answer
#100 = correct answer

Unnamed: 0,case,model,dataset,SR,meanT,maxT,minT,wa,pd,npd,persuasion_counts,correct_num,acc
3,CF_FD,alpaca,Boolq,0.0,-1,-1,-1,281,0,185,0;100;100;0;0;0;100;0;100;100;100;100;100;0;10...,185;0;0;0;0,0.396996
2,CF_SD,alpaca,Boolq,0.0,-1,-1,-1,281,0,185,0;100;100;0;0;0;100;0;100;100;100;100;100;0;10...,185;0;0;0;0,0.396996
10,CF_FD,alpaca,NQ1,0.0,-1,-1,-1,322,0,141,100;0;0;0;0;0;100;0;0;100;0;100;0;0;0;0;100;10...,141;0;0;0;0,0.304536
8,CF_SD,alpaca,NQ1,0.0,-1,-1,-1,365,0,98,100;0;0;0;0;0;0;0;0;0;0;100;0;0;0;0;0;100;0;0;...,98;0;0;0;0,0.211663
11,CF_FD,alpaca,NQ2,0.0,-1,-1,-1,359,0,105,100;100;0;0;0;100;0;0;0;0;0;0;0;100;0;0;0;100;...,105;0;0;0;0,0.226293
9,CF_SD,alpaca,NQ2,0.0,-1,-1,-1,372,0,92,100;100;0;0;0;0;0;0;0;0;0;0;0;100;0;0;0;0;0;10...,92;0;0;0;0,0.198276
15,CF_FD,alpaca,TruthfulQA,0.0,-1,-1,-1,401,0,58,0;0;0;0;0;0;0;0;0;0;0;0;100;0;0;100;0;0;0;0;0;...,58;0;0;0;0,0.126362
14,CF_SD,alpaca,TruthfulQA,0.0,-1,-1,-1,410,0,49,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;...,49;0;0;0;0,0.106754


In [9]:
from scipy import stats
rng = np.random.default_rng()
sample1 = stats.uniform.rvs(size=100, random_state=rng)
sample1

array([0.18461694, 0.85425814, 0.48613999, 0.77830773, 0.93595828,
       0.16342666, 0.05024683, 0.5491425 , 0.63271769, 0.42880488,
       0.6032384 , 0.21173627, 0.66215794, 0.62570336, 0.57298021,
       0.88840583, 0.61963176, 0.97642719, 0.59543013, 0.79421873,
       0.32071328, 0.32524996, 0.83513975, 0.73453768, 0.41484376,
       0.79587821, 0.70576377, 0.50239073, 0.52535867, 0.9923966 ,
       0.67948274, 0.3151442 , 0.9804157 , 0.19019161, 0.79946627,
       0.27142488, 0.28058964, 0.51543432, 0.58297742, 0.08770603,
       0.38899582, 0.85407604, 0.93940645, 0.85016634, 0.93206563,
       0.98545121, 0.41086194, 0.53367411, 0.76465396, 0.36088033,
       0.55793017, 0.60560327, 0.73984095, 0.816276  , 0.54824892,
       0.01055531, 0.38812449, 0.98932822, 0.21464249, 0.80649413,
       0.5204962 , 0.3224356 , 0.3620687 , 0.36889652, 0.65356895,
       0.81009099, 0.13417969, 0.78143915, 0.32895518, 0.49138996,
       0.14271099, 0.81628961, 0.26320638, 0.88929899, 0.66008