In [1]:
import os
import time
import json
import random

import pandas as pd

from llm_utils import *
from getpass import getpass

# Authenticate with the APIs

In [2]:
genai.configure(api_key=getpass("Enter API key: "))

# Role message

In [3]:
role_message = """You're an artificial intelligence which reacts to vignettes on psychological scales.
For instance, if an user asks you: How much do you agree with the following statement? 'I'm a large language model', 1 - Completely disagree, 2, 3, 4 - Neither agree nor disagree, 5, 6, 7 - Completely agree, you should respond with a number between 1 and 7.
Your response SHOULD NOT contain the number's accompanying text. So, if you select '7', you should just return '7', instead of '7 - Completely agree'. Similarly, if they ask: Are you a large language model?, 0 - No, 1 - Yes, you should respond with '0' or '1', instead of '0 - No' or '1 - Yes'.
DON'T explain your reasoning. I'm ONLY interested in your response to the scale.
Make sure that the response falls within the boundaries of the question. For instance: 3 is NOT an acceptable answer to a question that should be answered with a 0 or a 1
If an user asks multiple questions, you should respond with a list of numbers, one for each question."""

# Models to use

In [4]:
models = {
    "Gemini Pro": call_gemini,
    #"Llama 2 Chat 70b": call_llama2,
    #"Claude 2.1": call_claude2_1,
    #"GPT-4": call_gpt4,
}

Loading mfq info

In [5]:
with open("./stimuli/mfq.json") as f:
    mfq = json.load(f)

Loading MFV prompts

In [6]:
# read the stimuli from mfv.json
with open("./stimuli/mfv.json") as f:
    mfv_info = json.load(f)

Loading vignettes

In [8]:
original_vignettes = pd.read_html(mfv_info["vignettes_html"])[0]
original_vignettes["mfv_code"] = mfv_info["validated_codes"]

# loading vignittes used in portuguese
mfv_pt = pd.read_excel("stimuli/mfvignettes_pt.xlsx", sheet_name=0)
double_validated_codes = mfv_pt["MFV Code"].to_list()
foundations = mfv_pt["Foundation"].values

In [9]:
def generate_mfq_stimuli(randomize=False):
    global mfq
    if randomize is False:
        return {
            "p1": mfq["mfq_part1"],
            "p2": mfq["mfq_part2"],
            "items1": mfq["part1_item_key"],
            "items2": mfq["part2_item_key"],
        }
    else:
        from sklearn.utils import shuffle

        # randomize the order of the items
        p1, p1items = shuffle(mfq["part1_questions"], mfq["part1_item_key"])
        p2, p2items = shuffle(mfq["part2_questions"], mfq["part2_item_key"])
        # generate string with p1 with number at beginning of each line
        p1 = "\n".join([f"{i+1}. {q}" for i, q in enumerate(p1)])
        p2 = "\n".join([f"{i+1+len(p1items)}. {q}" for i, q in enumerate(p2)])
        return {"p1": p1, "p2": p2, "items1": p1items, "items2": p2items}
    
def generate_mfv_stimuli(randomize=False):
    global mfv_info
    pre_mfv = mfv_info["pre_mfv"]
    if randomize is False:
        numbered_vignettes = "\n".join(
            (
                (original_vignettes.reset_index().index + 1).astype(str)
                + ". "
                + original_vignettes["Scenario"]
            ).to_list()
        )
        mfv_prompt = pre_mfv + "\n\n" + numbered_vignettes
        codes = original_vignettes["mfv_code"].to_list()
    else:
        df = original_vignettes.sample(frac=1).copy()
        numbered_vignettes = "\n".join(
            (
                (df.reset_index().index + 1).astype(str)
                + ". "
                + df["Scenario"]
            ).to_list()
        )
        mfv_prompt = pre_mfv + "\n\n" + numbered_vignettes
        codes = df["mfv_code"].to_list()
    return {
        "prompt": mfv_prompt,
        "mfv_codes": codes,
    }

# Generation - MfVs

In [10]:
original_vignettes = pd.read_html(mfv_info["vignettes_html"])[0]
original_vignettes["mfv_code"] = mfv_info["validated_codes"]

In [11]:
# loading vignittes used in portuguese
mfv_pt = pd.read_excel("stimuli/mfvignettes_pt.xlsx", sheet_name=0)
double_validated_codes = mfv_pt["MFV Code"].to_list()
foundations = mfv_pt["Foundation"].values

Maintaining only validated in brazilian replication

In [12]:
# maintaining only validated in brazilian replication
original_vignettes.query("mfv_code in @double_validated_codes", inplace=True)

In [13]:
def run_mfq(model, chat_history):
    mfq_info = generate_mfq_stimuli(randomize=True)
    mfq_part1 = mfq_info["p1"]
    mfq_part2 = mfq_info["p2"]
    chat_history.append({"role": "user", "content": mfq_part1})
    part1_answer = model(chat_history)

    chat_history.extend(
        [
            {"role": "assistant", "content": part1_answer},
            {"role": "user", "content": mfq_part2},
        ]
    )
    # if part1 is not none
    if part1_answer:
        part2_answer = model(chat_history)
        chat_history.append({"role": "assistant", "content": part2_answer})
    else:
        part2_answer = None
    return (
        {
            "part1": part1_answer,
            "part2": part2_answer,
            "chat_history": chat_history,
            "part1_order": mfq_info["items1"],
            "part2_order": mfq_info["items2"],
        },
    )


def run_mfv(model, chat_history):
    mfv_info = generate_mfv_stimuli(randomize=True)
    mfv_prompt = mfv_info["prompt"]
    chat_history.append({"role": "user", "content": mfv_prompt})
    mfv_answer = model(chat_history)

    chat_history.append({"role": "assistant", "content": mfv_answer})
    return {
        "mfv": mfv_answer,
        "chat_history": chat_history,
        "mfv_codes": mfv_info["mfv_codes"],
    }


def generate_mfv_condition(model, agent_name, id, condition):
    base_chat = [
        {"role": "system", "content": role_message},
    ]

    chat_log = base_chat.copy()
    if condition == "qv":
        # call mfq
        mfq_answer = run_mfq(model, chat_log)

        # call mfv
        mfv_answer = run_mfv(model, chat_log)
    elif condition == "vq":
        # call mfv
        mfv_answer = run_mfv(model, chat_log)

        # call mfq
        mfq_answer = run_mfq(model, chat_log)
    else:
        raise ValueError("Condition must be 'qv' or 'vq'")

    # append to responses
    return {
        "agent": agent_name,
        "id": id,
        "condition": condition,
        # "ideology": response_ideology,
        "mfq": mfq_answer,
        "mfv": mfv_answer,
    }

In [14]:
def run_experiment(n):
    results = list()
    for i in range(n):
        for agent, model in models.items():
            for condition in ["qv", "vq"]:
                results.append(generate_mfv_condition(model, agent, i, condition))
    return results

In [16]:
r = run_experiment(5)

with open("responses/mfv_test_gemini.json", "w") as f:
    json.dump(r, f, indent=4, ensure_ascii=False)