In [45]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
from pingouin import cronbach_alpha



# Loading data

In [2]:
mfq = pd.read_csv(
    Path("").parent / "data" / "mfq.csv",
)

mfv = pd.read_csv(
    Path("").parent / "data" / "mfv.csv",
)

  return warn(


# MFQ Analysis

Converting mfq to wide

In [3]:
mfq.head(2)

Unnamed: 0,agent,id,condition,answer,code
0,GPT-4,0,qv,1,traditions
1,GPT-4,0,qv,0,math


In [4]:
# convert mfq to wide format
mfq_wide = mfq.pivot(
    index=["agent", "condition", "id"], columns="code", values="answer"
).reset_index()

In [5]:
invalid_responses = mfq_wide.query("math >= 3 or good <= 2")[
    ["agent", "condition", "id"]
].copy()

mfq_wide.query("math >= 3 or good <= 2")[["agent", "condition", "id", "math", "good"]]

code,agent,condition,id,math,good
131,Claude 2.1,vq,60,3,4


In [6]:
mfq_wide.query("math < 3 and good > 2", inplace=True)
mfq_wide.drop(columns=["math", "good"], inplace=True)

latex for results

In [7]:
print(
    mfq_wide.groupby(["agent", "condition"])
    .size()
    .reset_index()
    .pivot_table(index="agent", columns="condition", values=0)
    # .style.to_latex(
    #     hrules=True,
    #     label="tab:valid_generations",
    #     caption="Valid generations for each model per condition",
    # )
)

condition     qv    vq
agent                 
Claude 2.1  92.0  68.0
GPT-4       98.0  91.0
Gemini Pro   NaN   1.0


In [8]:
mfq_wide.drop(
    columns=[
        "id",
    ]
).groupby(["agent", "condition"]).nunique().transpose()

agent,Claude 2.1,Claude 2.1,GPT-4,GPT-4,Gemini Pro
condition,qv,vq,qv,vq,vq
code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
animal,4,2,2,1,1
betray,5,4,6,3,1
chaos,5,2,6,4,1
chastity,4,3,5,4,1
compassion,3,2,2,2,1
cruel,2,2,3,1,1
decency,4,4,6,4,1
disgusting,5,4,5,5,1
emotionally,4,3,4,2,1
fairly,2,2,2,1,1


Discarding Gemini

In [9]:
mfq_wide.query("agent != 'Gemini Pro'", inplace=True)

Generates dictionary to obtain fondation for each key

In [10]:
with open("./stimuli/mfq.json") as f:
    mfq = json.load(f)

questions_dict = {}
for i in mfq["part1_item_key"] + mfq["part2_item_key"]:
    if i[1] is None:
        continue
    if i[1] in questions_dict:
        questions_dict[i[1]].append(i[0])
    else:
        questions_dict[i[1]] = [i[0]]

Create an empty dataframe to fill with each foundation per experiment

In [11]:
df_foundations = pd.DataFrame()
df_foundations["agent"] = mfq_wide["agent"]
df_foundations["id"] = mfq_wide["id"]
df_foundations["condition"] = mfq_wide["condition"]

In [12]:
questions_dict

{'harm': ['emotionally', 'weak', 'cruel', 'compassion', 'animal', 'kill'],
 'fairness': ['treated', 'unfairly', 'rights', 'fairly', 'justice', 'rich'],
 'ingroup': ['lovecountry', 'betray', 'loyalty', 'history', 'family', 'team'],
 'authority': ['respect',
  'traditions',
  'chaos',
  'kidrespect',
  'sexroles',
  'soldier'],
 'purity': ['decency',
  'disgusting',
  'god',
  'harmlessdg',
  'unnatural',
  'chastity']}

In [13]:
alphas = list()

for foundation, questions in questions_dict.items():
    df_foundations[f"MFQ_{foundation.lower()}"] = mfq_wide[questions].mean(axis=1)
    for agent in mfq_wide["agent"].unique():
        agent_df = mfq_wide.query("agent == @agent")
        alphas.append(
            [
                "MFQ",
                foundation.title(),
                "overall",
                agent,
                cronbach_alpha(agent_df[questions])[0],
            ]
        )
        # part 1 (questions[:3]) and part 2 (questions[3:])
        alphas.append(
            [
                "MFQ - Part 1",
                foundation.title(),
                "overall",
                agent,
                cronbach_alpha(agent_df[questions[:3]])[0],
            ]
        )
        alphas.append(
            [
                "MFQ - Part 2",
                foundation.title(),
                "overall",
                agent,
                cronbach_alpha(agent_df[questions[3:]])[0],
            ]
        )

        for condition in mfq_wide["condition"].unique():
            df_cond = agent_df.query("condition == @condition")
            alphas.append(
                [
                    "MFQ",
                    foundation.title(),
                    condition,
                    agent,
                    cronbach_alpha(df_cond[questions])[0],
                ]
            )
            alphas.append(
                [
                    "MFQ - Part 1",
                    foundation.title(),
                    condition,
                    agent,
                    cronbach_alpha(df_cond[questions[:3]])[0],
                ]
            )
            alphas.append(
                [
                    "MFQ - Part 2",
                    foundation.title(),
                    condition,
                    agent,
                    cronbach_alpha(df_cond[questions[3:]])[0],
                ]
            )

cronbach_df = pd.DataFrame(
    alphas, columns=["scale", "foundation", "condition", "agent", "alpha"]
)

In [14]:
cronbach_df.head(2)

Unnamed: 0,scale,foundation,condition,agent,alpha
0,MFQ,Harm,overall,Claude 2.1,0.478099
1,MFQ - Part 1,Harm,overall,Claude 2.1,0.667066


# Processing MFVs

Loading code-foundation relation

In [15]:
mfv_pt = pd.read_excel("stimuli/mfvignettes_pt.xlsx", sheet_name=0)
foundations = mfv_pt.set_index("MFV Code")["Foundation"].to_dict()

In [16]:
# add foundations_col to mfv
mfv["foundation"] = mfv["mfv_codes"].map(foundations)

# remove gemini data
mfv.query("agent != 'Gemini Pro'", inplace=True)

# drop instances in invalid_responses
mfv.drop(
    mfv[
        mfv[["agent", "condition", "id"]]
        .apply(tuple, axis=1)
        .isin(invalid_responses.apply(tuple, axis=1))
    ].index,
    inplace=True,
)

Checking for correct answer effect

In [17]:
mfv.groupby(["agent", "condition", "mfv_codes"]).agg({"mfv": "nunique"}).query(
    "mfv == 1"
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mfv
agent,condition,mfv_codes,Unnamed: 3_level_1
GPT-4,qv,201,1
GPT-4,qv,208,1
GPT-4,vq,201,1
GPT-4,vq,208,1
GPT-4,vq,804,1


Consertar médi para fazer por cada caso

In [18]:
# generate alphas for mfv
alphas_mfv = list()

for f in mfv["foundation"].unique():
    df_foundations = df_foundations.merge(
        mfv.query("foundation == @f")
        .pivot(index=["agent", "id", "condition"], columns="mfv_codes", values="mfv")
        .mean(axis=1)
        .rename(f"MFV_{f}")
        .reset_index(),
        on=["agent", "id", "condition"],
        how="left",
    )
    for a in mfv["agent"].unique():
        df_agent = mfv.query(f"foundation == '{f}' and agent == '{a}'").pivot(
            index=["agent", "id", "condition"], columns="mfv_codes", values="mfv"
        )
        alphas_mfv.append(["MFV", "overall", f, a, cronbach_alpha(df_agent)[0]])

        for condition in mfv["condition"].unique():
            df_condition = df_agent.query(f"condition == '{condition}'")
            alphas_mfv.append(["MFV", condition, f, a, cronbach_alpha(df_condition)[0]])

cronbach_df_mfv = pd.DataFrame(
    alphas_mfv, columns=["scale", "condition", "foundation", "agent", "alpha"]
)

# concatenate both scales
cronbach_df = pd.concat([cronbach_df, cronbach_df_mfv], axis=0)

# Measuring pairwise correlation

In [19]:
corr_data = list()

# calculate pairwise correlation for MFQ
for foundation, questions in questions_dict.items():
    for agent in mfq_wide["agent"].unique():
        for i in range(6):
            for q in range(i + 1, 6):
                agent_df = mfq_wide.query("agent == @agent")
                corr_data.append(
                    [
                        agent,
                        "MFQ",
                        foundation.title(),
                        "overall",
                        questions[i],
                        questions[q],
                        *pearsonr(agent_df[questions[i]], agent_df[questions[q]]),
                        *spearmanr(agent_df[questions[i]], agent_df[questions[q]]),
                    ]
                )
                for condition in mfq_wide["condition"].unique():
                    cond_df = agent_df.query("condition == @condition")
                    corr_data.append(
                        [
                            agent,
                            "MFQ",
                            foundation.title(),
                            condition,
                            questions[i],
                            questions[q],
                            *pearsonr(cond_df[questions[i]], cond_df[questions[q]]),
                            *spearmanr(cond_df[questions[i]], cond_df[questions[q]]),
                        ]
                    )

# calculate pairwise correlation for MFV
for f in mfv["foundation"].unique():
    for agent in mfv["agent"].unique():
        df_agent = mfv.query("foundation == @f and agent == @a").pivot(
            index=["agent", "id", "condition"], columns="mfv_codes", values="mfv"
        )
        for i in range(df_agent.shape[1]):
            for q in range(i + 1, df_agent.shape[1]):
                corr_data.append(
                    [
                        agent,
                        "MFV",
                        f,
                        "overall",
                        df_agent.columns[i],
                        df_agent.columns[q],
                        *pearsonr(df_agent.iloc[:, i], df_agent.iloc[:, q]),
                        *spearmanr(df_agent.iloc[:, i], df_agent.iloc[:, q]),
                    ]
                )
                for condition in mfv["condition"].unique():
                    df_condition = df_agent.query(f"condition == @condition")
                    corr_data.append(
                        [
                            agent,
                            "MFV",
                            f,
                            condition,
                            df_agent.columns[i],
                            df_agent.columns[q],
                            *pearsonr(df_condition.iloc[:, i], df_condition.iloc[:, q]),
                            *spearmanr(
                                df_condition.iloc[:, i], df_condition.iloc[:, q]
                            ),
                        ]
                    )

corr_df = pd.DataFrame(
    corr_data,
    columns=[
        "agent",
        "scale",
        "foundation",
        "condition",
        "q1",
        "q2",
        "pearson_r",
        "pearson_p",
        "spearman_r",
        "spearman_p",
    ],
)



# Loading and processing data from original MFV Study 2

## Creating CSV data correspondence

In [20]:
# correspondence between code and col nome
mfvcode_correspondence = {
    101: "care12",
    102: "care8",
    103: "care11",
    104: "care6",
    105: "care5",
    106: "care10",
    107: "care7",
    108: "care16",
    109: "care4",
    110: "care1",
    111: "care2",
    112: "care3",
    113: "care13",
    114: "care10",
    115: "care9",
    116: "care15",
    201: "carepa9",
    202: "carepa2",
    203: "carepa1",
    204: "carepa5",
    205: "carepa6",
    206: "carepa4",
    207: "carepa7",
    208: "carepa3",
    301: "careph12",
    302: "careph15",
    303: "careph14",
    401: "fair8",
    402: "fair1",
    403: "fair2",
    404: "fair10",
    405: "fair6",
    406: "fair3",
    407: "fair4",
    408: "fair14",
    409: "fair5",
    410: "fair17",
    411: "fair16",
    412: "fair15",
    501: "libt2",
    502: "libt7",
    503: "libt5",
    504: "libt9",
    505: "libt6",
    506: "libt13",
    507: "libt17",
    508: "libt12",
    509: "libt1",
    510: "libt11",
    511: "libt10",
    601: "auth3",
    602: "auth9",
    603: "auth8",
    604: "auth11",
    605: "auth14",
    606: "auth16",
    607: "auth12",
    608: "auth13",  # ??
    609: "auth15",
    610: "auth5",
    611: "auth10",
    612: "auth1",
    613: "auth17",
    614: "auth6",
    701: "loya3",
    702: "loya7",
    703: "loya6",
    704: "loya13",
    705: "loya1",
    706: "loya14",
    707: "loya9",
    708: "loya2",
    709: "loya8",
    710: "loya16",
    711: "loya4",
    712: "loya12",
    713: "loya10",
    714: "loya15",
    715: "loya11",
    716: "loya5",
    801: "sanc1",
    802: "sanc8",
    803: "sanc5",
    804: "sanc3",
    805: "sanc13",
    806: "sanc9",
    807: "sanc14",
    808: "sanc7",
    809: "sanc15",
    810: "sanc11",
}

In [21]:
mfq_correspondence = {}
# select firs item of each list
mfq_rel_items = [x[0] for x in mfq["part1_item_key"]]
mfq_jdg_items = [x[0] for x in mfq["part2_item_key"]]

for i in range(16):
    if i < 8:
        q_group = 1
    else:
        q_group = 2
    col = (i + 1) - (q_group - 1) * 8
    mfq_correspondence[mfq_rel_items[i]] = f"mfq_rel{q_group}_{col}"
    mfq_correspondence[mfq_jdg_items[i]] = f"mfq_jdg{q_group}_{col}"

## Loading human data

In [22]:
human = pd.read_csv(Path("./mfv_original_data") / "MFV_Study2.csv")

# select only columns with values in mfq_correspondence or mfvcode_correspondence
human_mfq = human[[c for c in human.columns if c in mfq_correspondence.values()]].copy()

# rename columns from correspondence values to keys
human_mfq.rename(columns={v: k for k, v in mfq_correspondence.items()}, inplace=True)

human_mfv = human[
    [c for c in human.columns if c in mfvcode_correspondence.values()]
].copy()
human_mfv.rename(
    columns={v: k for k, v in mfvcode_correspondence.items()}, inplace=True
)

In [23]:
mfv.head(2)

Unnamed: 0,agent,id,condition,mfv,mfv_codes,foundation
0,GPT-4,0,qv,3,113,Care (e)
1,GPT-4,0,qv,2,603,Authority


In [24]:
alphas_human = list()

# MFV alphas
for f in mfv["foundation"].unique():
    codes = mfv.query("foundation == @f")["mfv_codes"].unique().tolist()
    _df = human_mfv[codes]
    # calculate alpha for foundation
    alphas_human.append(["MFV", f, "overall", "Human", cronbach_alpha(_df)[0]])

# mfq alphas for humans
for foundation, questions in questions_dict.items():
    _df = human_mfq[questions]
    alphas_human.append(["MFQ", foundation.title(), "overall", "Human", cronbach_alpha(_df)[0]])
    alphas_human.append(["MFQ - Part 1", foundation.title(), "overall", "Human", cronbach_alpha(_df[questions[:3]])[0]])
    alphas_human.append(["MFQ - Part 2", foundation.title(), "overall", "Human", cronbach_alpha(_df[questions[3:]])[0]])

human_alphas = pd.DataFrame(
    alphas_human, columns=["scale", "foundation", "condition", "agent", "alpha"]
)
    

In [25]:
# join all alphas
cronbach_df = pd.concat([cronbach_df, human_alphas], axis=0)

T-Test for alpha distribution between conditions

In [34]:
qv_alpha = cronbach_df.query("condition == 'qv' and agent != 'Human'")["alpha"].values
vq_alpha = cronbach_df.query("condition == 'vq' and agent != 'Human'")["alpha"].values

ttest_ind(qv_alpha, vq_alpha)

Ttest_indResult(statistic=1.8834069285167483, pvalue=0.063025076728054)

For individual angents

# Saving data to CSV

In [27]:
# save data
p = Path("results")
p.mkdir(exist_ok=True)

cronbach_df.to_csv(p / "cronbach.csv", index=False)
corr_df.to_csv(p / "correlations.csv", index=False)
df_foundations.to_csv(p / "foundation_cond_agg.csv", index=False)
mfq_wide.to_csv(p / "mfq_wide.csv.zip", index=False)
mfv.pivot(
    index=["agent", "condition", "id"],
    columns="mfv_codes",
    values="mfv",
).to_csv(p / "mfv_wide.csv.zip", index=False)