In [1]:
import pandas as pd
from scipy import stats

def analyze_linguistic_differences():
    df_cft = pd.read_csv('data_cft.csv')
    df_dpo = pd.read_csv('data_dpo.csv')
    df_rejected = pd.read_csv('data_rejected.csv')
    df_chosen = pd.read_csv('data_chosen.csv')

    common_cols = df_cft.columns
    
    results = []

    for col in common_cols:

        # CFT와 DPO의 차이 검정
        pair_model = pd.concat([df_cft[col], df_dpo[col]], axis=1).dropna()
        if len(pair_model) > 1:
            t_val1, p_val1 = stats.ttest_rel(pair_model.iloc[:, 0], pair_model.iloc[:, 1])
        else:
            t_val1, p_val1 = None, None

        # Rejected와 Chosen의 차이 검정
        pair_data = pd.concat([df_rejected[col], df_chosen[col]], axis=1).dropna()
        if len(pair_data) > 1:
            t_val2, p_val2 = stats.ttest_rel(pair_data.iloc[:, 0], pair_data.iloc[:, 1])
        else:
            t_val2, p_val2 = None, None

        results.append({
            'Feature': col,
            'CFT_vs_DPO_t': t_val1,
            'CFT_vs_DPO_p': p_val1,
            'Rej_vs_Cho_t': t_val2,
            'Rej_vs_Cho_p': p_val2
        })

    df_res = pd.DataFrame(results)

    return df_res

result_df = analyze_linguistic_differences()
print(result_df)

                Feature  CFT_vs_DPO_t  CFT_vs_DPO_p  Rej_vs_Cho_t  \
0   avg_sentence_length     11.161511  8.884705e-23    114.222829   
1       avg_word_length     23.660092  9.560197e-60    250.339093   
2    complex_word_ratio     25.385804  2.530906e-64    313.800263   
3         pronoun_ratio    -14.973263  1.917589e-34    -95.036412   
4         hedging_ratio     -3.429356  7.354117e-04     16.400062   
5       noun_verb_ratio     15.014844  1.429114e-34     91.418536   
6  nominalization_ratio     19.590836  2.559674e-48    232.746586   

   Rej_vs_Cho_p  
0  0.000000e+00  
1  0.000000e+00  
2  0.000000e+00  
3  0.000000e+00  
4  9.918615e-60  
5  0.000000e+00  
6  0.000000e+00  
