In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import pearsonr, spearmanr, ttest_ind
from sklearn.preprocessing import StandardScaler
from statsmodels.iolib.summary2 import summary_col

In [2]:
data_dir = Path() / "results"
figs_dir = Path() / "figs"
figs_dir.mkdir(exist_ok=True)

# Statistics

In [5]:
cronbach_df = pd.read_csv(data_dir / "cronbach.csv")

In [6]:
# claude ttest
ttest_ind(
    cronbach_df.query("agent == 'Claude 2.1' and condition == 'qv'")["alpha"].values,
    cronbach_df.query("agent == 'Claude 2.1' and condition == 'vq'")["alpha"].values,
)

Ttest_indResult(statistic=0.054352978972766676, pvalue=0.9569116669285772)

In [7]:
# gpt-4 ttest
ttest_ind(
    cronbach_df.query("agent == 'GPT-4' and condition == 'qv'")["alpha"].values,
    cronbach_df.query("agent == 'GPT-4' and condition == 'vq'")["alpha"].values,
)

Ttest_indResult(statistic=2.4887215445023743, pvalue=0.016869329019227434)

Calculating Cohen-D for GPT-4

In [8]:
def cohen_d(x, y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return (np.mean(x) - np.mean(y)) / np.sqrt(
        ((nx - 1) * np.std(x, ddof=1) ** 2 + (ny - 1) * np.std(y, ddof=1) ** 2) / dof
    )

cohen_d(
    cronbach_df.query("agent == 'GPT-4' and condition == 'qv'")["alpha"].values,
    cronbach_df.query("agent == 'GPT-4' and condition == 'vq'")["alpha"].values,
)

0.7503777791625594

ANOVA for agentt

In [26]:
formula = "alpha ~ C(agent, Treatment('Human'))"
model = smf.ols(formula, cronbach_df).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)

                                sum_sq     df         F    PR(>F)
C(agent, Treatment('Human'))  0.416204    2.0  3.238227  0.041974
Residual                      9.703895  151.0       NaN       NaN


In [29]:
cronbach_df.groupby("agent").mean()

Unnamed: 0_level_0,alpha
agent,Unnamed: 1_level_1
Claude 2.1,0.597118
GPT-4,0.574852
Human,0.7316


In [27]:
model.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.028
Dependent Variable:,alpha,AIC:,17.3116
Date:,2024-01-21 20:49,BIC:,26.4224
No. Observations:,154,Log-Likelihood:,-5.6558
Df Model:,2,F-statistic:,3.238
Df Residuals:,151,Prob (F-statistic):,0.042
R-squared:,0.041,Scale:,0.064264

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,0.7316,0.0540,13.5363,0.0000,0.6248,0.8384
"C(agent, Treatment('Human'))[T.Claude 2.1]",-0.1345,0.0624,-2.1549,0.0328,-0.2578,-0.0112
"C(agent, Treatment('Human'))[T.GPT-4]",-0.1567,0.0624,-2.5117,0.0131,-0.2801,-0.0334

0,1,2,3
Omnibus:,10.872,Durbin-Watson:,1.418
Prob(Omnibus):,0.004,Jarque-Bera (JB):,11.498
Skew:,-0.64,Prob(JB):,0.003
Kurtosis:,2.605,Condition No.:,6.0


# Regression models

In [3]:
df = pd.read_csv(data_dir / "foundation_cond_agg.csv")

# renaming columns to use same terminology
df.rename(
    columns={
        "MFQ_harm": "MFQ_Care",
        "MFQ_ingroup": "MFQ_Loyalty",
        "MFQ_authority": "MFQ_Authority",
        "MFQ_purity": "MFQ_Purity",
        "MFQ_fairness": "MFQ_Fairness",
    },
    inplace=True,
)

df.head(2)

Unnamed: 0,agent,id,condition,MFQ_Care,MFQ_Fairness,MFQ_Loyalty,MFQ_Authority,MFQ_Purity,MFV_Care (e),MFV_Authority,MFV_Loyalty,MFV_Purity,MFV_Liberty,MFV_Care (p),MFV_Fairness
0,Claude 2.1,0,qv,4.166667,4.833333,2.333333,2.166667,1.333333,2.363636,2.5,2.4,3.571429,2.142857,3.555556,2.888889
1,Claude 2.1,1,qv,4.166667,4.833333,2.5,1.833333,1.5,3.0,2.4,2.6,3.571429,2.714286,3.444444,2.888889


Applying StandardScaler

In [4]:
scaler = StandardScaler()

# apply on columns starting with MFQ or MFV
df.loc[
    :, df.columns.str.startswith("MFQ") | df.columns.str.startswith("MFV")
] = scaler.fit_transform(
    df.loc[:, df.columns.str.startswith("MFQ") | df.columns.str.startswith("MFV")]
)

In [5]:
# rename columns to standardize under care, loyalty etc

formula = "Q('{}') ~ MFQ_Authority + MFQ_Care + MFQ_Fairness + MFQ_Loyalty + MFQ_Purity"

dependents = [x for x in df.columns if x.startswith("MFV_")]
dependents.sort()

In [6]:
df.columns

Index(['agent', 'id', 'condition', 'MFQ_Care', 'MFQ_Fairness', 'MFQ_Loyalty',
       'MFQ_Authority', 'MFQ_Purity', 'MFV_Care (e)', 'MFV_Authority',
       'MFV_Loyalty', 'MFV_Purity', 'MFV_Liberty', 'MFV_Care (p)',
       'MFV_Fairness'],
      dtype='object')

# Models for Claude

In [10]:
models = list()

for i in dependents:
    mod = smf.ols(
        formula=formula.format(i),
        data=df.query("agent == 'Claude 2.1'"),
    )
    res = mod.fit()
    models.append(res)

output = summary_col(models, stars=True,)

output

0,1,2,3,4,5,6,7
,Q('MFV_Authority'),Q('MFV_Care (e)'),Q('MFV_Care (p)'),Q('MFV_Fairness'),Q('MFV_Liberty'),Q('MFV_Loyalty'),Q('MFV_Purity')
Intercept,-0.7571***,-0.4074***,-0.9945***,-0.9677***,-0.6123***,0.2053,-0.8422***
,(0.1130),(0.1422),(0.1116),(0.0954),(0.1498),(0.1770),(0.1515)
MFQ_Authority,-0.0053,-0.0665,-0.1499,-0.0907,0.0744,0.0213,-0.1819
,(0.1025),(0.1290),(0.1013),(0.0865),(0.1359),(0.1606),(0.1374)
MFQ_Care,0.0389,0.0829,0.0090,0.0311,-0.0032,-0.0260,0.0178
,(0.0917),(0.1155),(0.0907),(0.0775),(0.1217),(0.1438),(0.1230)
MFQ_Fairness,-0.1288**,-0.2014***,-0.1075*,-0.0650,-0.1256*,-0.1472*,-0.1187
,(0.0557),(0.0701),(0.0551),(0.0470),(0.0739),(0.0873),(0.0747)
MFQ_Loyalty,0.0755,0.1486,0.0394,0.0191,0.0396,0.2095,0.0669


## GPT-4

In [11]:
models = list()

for i in dependents:
    mod = smf.ols(
        formula=formula.format(i),
        data=df.query("agent == 'GPT-4'"),
    )
    res = mod.fit()
    models.append(res)

output = summary_col(models, stars=True)

output

0,1,2,3,4,5,6,7
,Q('MFV_Authority'),Q('MFV_Care (e)'),Q('MFV_Care (p)'),Q('MFV_Fairness'),Q('MFV_Liberty'),Q('MFV_Loyalty'),Q('MFV_Purity')
Intercept,0.6624***,0.4144***,0.7694***,0.7370***,0.5577***,-0.3263**,0.5310***
,(0.0884),(0.1315),(0.0400),(0.0514),(0.1035),(0.1380),(0.0708)
MFQ_Authority,0.0390,-0.0158,-0.0123,-0.0156,-0.0856,-0.3150**,-0.0212
,(0.0818),(0.1217),(0.0370),(0.0475),(0.0958),(0.1277),(0.0655)
MFQ_Care,0.0760,0.0506,0.0563,0.1262*,-0.0106,0.3611**,0.2184**
,(0.1108),(0.1649),(0.0501),(0.0644),(0.1298),(0.1730),(0.0887)
MFQ_Fairness,-0.0678,0.0167,0.0057,-0.0368,0.0391,-0.1723*,-0.1693***
,(0.0608),(0.0904),(0.0275),(0.0353),(0.0711),(0.0948),(0.0486)
MFQ_Loyalty,0.0020,-0.0523,-0.0048,-0.0324,0.0180,0.4925***,0.0608
