In [1]:
import os
import pandas as pd
import numpy as np
from glob import glob

from moral_lens.dilemma import DilemmaRunner
from moral_lens.judge import JudgeRunner

[INFO] Configured API keys: HF_TOKEN, OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY, OPENROUTER_API_KEY


## Query models

In [None]:
decision_model_ids = [
    # # # Standard Models # # #
    "gpt-4.1-2025-04-14",
    "gpt-4.1-mini-2025-04-14",
    "gpt-4.1-nano-2025-04-14",
    "gpt-4o-2024-11-20",
    "gpt-4o-2024-08-06",
    "gpt-4o-2024-05-13",
    "gpt-4o-mini-2024-07-18",
    "gpt-3.5-turbo-0125",
    "gpt-3.5-turbo-1106",
    # "gpt-3.5-turbo-0613", # depricated

    "google/gemini-2.5-flash-preview-04-17",
    "google/gemini-2.0-flash-001",
    "google/gemini-2.0-flash-lite-001",
    "google/gemini-pro-1.5",
    "google/gemini-flash-1.5",
    "google/gemini-flash-1.5-8b",

    "meta-llama/llama-4-maverick",
    "meta-llama/llama-4-scout",
    "meta-llama/llama-3.3-70b-instruct",
    # "meta-llama/llama-3.1-405b-instruct", # 83% approval rate
    "meta-llama/llama-3.1-70b-instruct",
    "meta-llama/llama-3.1-8b-instruct",
    "meta-llama/llama-3-70b-instruct",
    "meta-llama/llama-3-8b-instruct",
    # "meta-llama/llama-2-70b-chat", # no valid responses in early stages

    # "anthropic/claude-3.7-sonnet:beta", # not run
    # "anthropic/claude-3.5-sonnet:beta", # not run
    "anthropic/claude-3.5-sonnet-20240620:beta",
    "anthropic/claude-3.5-haiku:beta",
    # "anthropic/claude-3-sonnet:beta", # not run
    # "anthropic/claude-3-haiku:beta", # 79% approval rate

    "microsoft/phi-4",
    # "microsoft/phi-3.5-mini-128k-instruct", # not run
    # "microsoft/phi-3-medium-128k-instruct", # 89% approval rate
    # "microsoft/phi-3-mini-128k-instruct", # 88% approval rate

    "qwen/qwen-max",
    "qwen/qwen-plus",
    "qwen/qwen-turbo",

    "qwen/qwen3-32b:nothink",
    "qwen/qwen3-30b-a3b:nothink",

    "qwen/qwen-2.5-72b-instruct",
    "qwen/qwen-2.5-7b-instruct",
    "qwen/qwen-2-72b-instruct",

    "qwen/qwen3-32b:nothink",
    "qwen/qwen3-30b-a3b:nothink",

    "deepseek/deepseek-chat",
    "deepseek/deepseek-chat-v3-0324",
    # "deepseek/deepseek-prover-v2", # not run

    "google/gemma-3-27b-it",
    "google/gemma-3-12b-it",
    "google/gemma-3-4b-it",

    "google/gemma-2-27b-it",
    "google/gemma-2-9b-it",

    # "cohere/command-r", # rate limits
    # "cohere/command-r-plus",  # not run

    # "mistralai/mixtral-8x22b-instruct", # low approval rate
    "mistralai/mixtral-8x7b-instruct",
    "mistralai/mistral-7b-instruct-v0.3",
    "mistralai/mistral-7b-instruct-v0.1",

    "mistralai/mistral-large-2407",
    "mistralai/mistral-large",
    "mistralai/mistral-small",
    "mistralai/mistral-nemo",

    "amazon/nova-micro-v1",
    "amazon/nova-lite-v1",
    "amazon/nova-pro-v1",
]

judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-08-06",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/all_model_runs/"
# RESULTS_DIR = "data/20250507/test/"
# RESULTS_DIR = "data/20250507/reasoning_model_runs/"
# RESULTS_DIR = "data/20250507/paraphrase_model_runs/"

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for decision_model_id in decision_model_ids:
    continue_to_next_model = False
    for exp in exps:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=exp,
            results_dir=RESULTS_DIR,
            override_decision_temperature=1.0,  # default 0.0
            # prompts_template='reasoning_after',
            # prompts_template='no_reasoning',
            choices_filename="choices_672.csv",
        )
        await dr.run(
            # limit=10,
            # disable_validation=True,
            try_retries=False
        )
        # dr.process()

        # valid_pct = (dr.data.raw_response.str.len() != 0).sum() / len(dr.data)
    #     if valid_pct < 0.85:
    #         print(f"Warning: {valid_pct:.2%} of responses for model {decision_model_id} are empty.")
    #         continue_to_next_model = True
    #     if continue_to_next_model:
    #         continue
    # if continue_to_next_model:
    #     print(f"Finished decision model {decision_model_id}.\n\n")
    #     continue
    print(f"Finished decision model {decision_model_id}.\n\n")

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for decision_model_id in decision_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

In [None]:
hf_model_ids = [
    # # # HuggingFace # # #
    "Qwen2.5-32B-Instruct",
    "Qwen2.5-14B-Instruct",
    "Qwen2.5-3B-Instruct",
    "Qwen2.5-1.5B-Instruct",

    "Qwen1.5-72B-Chat",
    "Qwen1.5-32B-Chat",
    "Qwen1.5-14B-Chat",
    "Qwen1.5-7B-Chat",
    "Qwen1.5-4B-Chat",

    # "google/gemma-3-1b-it", # not run
]

exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for decision_model_id in hf_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

## Reasoning models

In [None]:
reasoning_model_ids = [
    # # # Reasoning Models # # #
    "qwen/qwq-32b",

    "qwen/qwen3-32b:think",
    "qwen/qwen3-30b-a3b:think",

    "deepseek/deepseek-r1",
    "deepseek/deepseek-r1-distill-llama-70b",
    "deepseek/deepseek-r1-distill-llama-8b",
]
judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-08-06",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/reasoning_model_runs/"

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for decision_model_id in reasoning_model_ids:
    for exp in exps:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=exp,
            results_dir=RESULTS_DIR,
            # override_decision_temperature=0.0,  # default
            # prompts_template='reasoning_after',
            prompts_template='no_reasoning',
            choices_filename="choices_672.csv",
        )
        await dr.run(
            # limit=10,
            # disable_validation=True,
            try_retries=False
        )
    print(f"Finished decision model {decision_model_id}.\n\n")

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for decision_model_id in reasoning_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

## SFT-DPO models

In [None]:
sftdpo_model_ids = [
    "amd/Instella-3B-SFT",
    "amd/Instella-3B-Instruct",

    "OLMo-2-0325-32B-SFT",
    "OLMo-2-0325-32B-DPO",
    "OLMo-2-0325-32B-Instruct",

    "OLMo-2-1124-13B-SFT",
    "OLMo-2-1124-13B-DPO",
    "OLMo-2-1124-13B-Instruct",

    "Llama-3.1-Tulu-3-70B-SFT",
    "Llama-3.1-Tulu-3-70B-DPO",
    "Llama-3.1-Tulu-3-70B",
]
judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-08-06",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/dpo_model_runs/"

In [None]:
# Run decision models on cluster

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    # "s4",
    # "s5",
]
for decision_model_id in sftdpo_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

In [None]:
preference_model_ids = [
    "allenai/tulu-2-13b",

    "allenai/tulu-v2.5-dpo-13b-uf-mean",
    "allenai/tulu-v2.5-dpo-13b-helpsteer",
    "allenai/tulu-v2.5-dpo-13b-shp2",
    "allenai/tulu-v2.5-dpo-13b-stackexchange",
    # "allenai/tulu-v2.5-dpo-13b-uf-overall", # not run
    "allenai/tulu-v2.5-dpo-13b-capybara",
    "allenai/tulu-v2.5-dpo-13b-prm-phase-2",
    "allenai/tulu-v2.5-dpo-13b-hh-rlhf",
    "allenai/tulu-v2.5-dpo-13b-nectar",
    # "allenai/tulu-v2.5-dpo-13b-chatbot-arena-2023", # not run
    "allenai/tulu-v2.5-dpo-13b-chatbot-arena-2024",
    "allenai/tulu-v2.5-dpo-13b-alpacafarm-human-pref",
    "allenai/tulu-v2.5-dpo-13b-alpacafarm-gpt4-pref",
    "allenai/tulu-v2.5-dpo-13b-argilla-orca-pairs",
]
judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-08-06",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/preference_runs/"

In [None]:
# Run decision models on cluster

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    # "s4",
    # "s5",
]
for decision_model_id in preference_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

## Paraphrase Consistency

In [None]:
decision_model_ids = [
    "gpt-4o-2024-11-20",
    "gpt-4o-mini-2024-07-18",
    "gpt-3.5-turbo-0125",

    "google/gemini-pro-1.5",
    "google/gemini-flash-1.5",

    "meta-llama/llama-4-maverick",
    "meta-llama/llama-3.3-70b-instruct",

    "mistralai/mistral-nemo",
    "mistralai/mistral-large",
]
judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-11-20",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/paraphrase_model_runs/"

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    # "s4",
    # "s5",
]
para_idxs = [1, 2, 3]
for decision_model_id in decision_model_ids:
    continue_to_next_model = False
    for exp in exps:
        for para_idx in para_idxs:
            dr = DilemmaRunner(
                model_id=decision_model_id,
                decision_run_name=f"{exp}-{para_idx}",
                results_dir=RESULTS_DIR,
                override_decision_temperature=1.0,  # default 0.0
                # prompts_template='reasoning_after',
                # prompts_template='no_reasoning',
                choices_filename="choices_672.csv",
                paraphrase_idx=para_idx,
            )
            await dr.run(
                # limit=10,
                # disable_validation=True,
                try_retries=False
            )
    print(f"Finished decision model {decision_model_id}.\n\n")

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    # "s4",
    # "s5",
]
para_idxs = [1, 2, 3]
for decision_model_id in decision_model_ids:
    for exp in exps:
        for para_idx in para_idxs:
            jr = JudgeRunner(
                decision_model_id=decision_model_id,
                decision_run_name=f"{exp}-{para_idx}",
                judge_model_id=judge_model_ids[0],
                judge_run_name=f"{exp}-{para_idx}",
                results_dir=RESULTS_DIR,
                judge_cot=True,
                override_judge_temperature=0.0,
            )
            await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")

## Judge Model Consistency

In [None]:
decision_model_ids = [
    "gpt-4o-2024-11-20",
    "gpt-4o-mini-2024-07-18",
    "gpt-3.5-turbo-0125",

    "google/gemini-pro-1.5",
    "google/gemini-flash-1.5",

    "meta-llama/llama-4-maverick",
    "meta-llama/llama-3.3-70b-instruct",

    "mistralai/mistral-nemo",
    "mistralai/mistral-large",
]
judge_model_ids = [
    "gpt-4.1-mini-2025-04-14",
    "gpt-4o-2024-11-20",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/judge_model_runs/"

In [None]:
# First, copy over relevant (above) model response files (all five samples) from
# `data/20250507/all_model_runs/responses` to `data/20250507/judge_model_runs/responses`
# if we don't want to run the decision models over again.

# exps = [
#     "s1",
#     "s2",
#     "s3",
#     "s4",
#     "s5",
# ]
# for decision_model_id in decision_model_ids:
#     continue_to_next_model = False
#     for exp in exps:
#         for para_idx in para_idxs:
#             dr = DilemmaRunner(
#                 model_id=decision_model_id,
#                 decision_run_name=f"{exp}",
#                 results_dir=RESULTS_DIR,
#                 override_decision_temperature=1.0,  # default 0.0
#                 # prompts_template='reasoning_after',
#                 # prompts_template='no_reasoning',
#                 choices_filename="choices_672.csv",
#             )
#             await dr.run(
#                 # limit=10,
#                 # disable_validation=True,
#                 try_retries=False
#             )
#     print(f"Finished decision model {decision_model_id}.\n\n")

In [None]:
exps = [
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
]
for judge_model_id in judge_model_ids:
    for decision_model_id in decision_model_ids:
        for exp in exps:
            for para_idx in para_idxs:
                jr = JudgeRunner(
                    decision_model_id=decision_model_id,
                    decision_run_name=f"{exp}",
                    judge_model_id=judge_model_id,
                    judge_run_name=f"{exp}",
                    results_dir=RESULTS_DIR,
                    judge_cot=True,
                    override_judge_temperature=0.0,
                )
                await jr.run_rationales()

        print(f"Finished analyzing decision model {decision_model_id}.\n\n")

## Reasoning after

In [None]:
after_model_ids = [
    # # # Standard Models # # #
    "gpt-4.1-2025-04-14",
    "gpt-4.1-mini-2025-04-14",
    "gpt-4.1-nano-2025-04-14",
    "gpt-4o-2024-11-20",
    "gpt-4o-2024-08-06",
    "gpt-4o-2024-05-13",
    "gpt-4o-mini-2024-07-18",
    "gpt-3.5-turbo-0125",
    "gpt-3.5-turbo-1106",

    "google/gemini-2.5-flash-preview-04-17",
    "google/gemini-2.0-flash-001",
    "google/gemini-2.0-flash-lite-001",
    "google/gemini-pro-1.5",
    "google/gemini-flash-1.5",
    "google/gemini-flash-1.5-8b",

    "meta-llama/llama-4-maverick",
    "meta-llama/llama-4-scout",
    "meta-llama/llama-3.3-70b-instruct",
    "meta-llama/llama-3.1-70b-instruct",
    "meta-llama/llama-3.1-8b-instruct",
    "meta-llama/llama-3-70b-instruct",
    "meta-llama/llama-3-8b-instruct",

    "anthropic/claude-3.5-sonnet-20240620:beta",
    "anthropic/claude-3.5-haiku:beta",

    "microsoft/phi-4",

    "qwen/qwen-max",
    "qwen/qwen-plus",
    "qwen/qwen-turbo",

    "qwen/qwen3-32b:nothink",
    "qwen/qwen3-30b-a3b:nothink",

    "qwen/qwen-2.5-72b-instruct",
    "qwen/qwen-2.5-7b-instruct",
    "qwen/qwen-2-72b-instruct",

    "qwen/qwen3-32b:nothink",
    "qwen/qwen3-30b-a3b:nothink",

    "deepseek/deepseek-chat",
    "deepseek/deepseek-chat-v3-0324",

    "google/gemma-3-27b-it",
    "google/gemma-3-12b-it",
    "google/gemma-3-4b-it",

    "google/gemma-2-27b-it",
    "google/gemma-2-9b-it",

    "mistralai/mixtral-8x7b-instruct",
    "mistralai/mistral-7b-instruct-v0.3",
    "mistralai/mistral-7b-instruct-v0.1",

    "mistralai/mistral-large-2407",
    "mistralai/mistral-large",
    "mistralai/mistral-small",
    "mistralai/mistral-nemo",

    "amazon/nova-micro-v1",
    "amazon/nova-lite-v1",
    "amazon/nova-pro-v1",
]
judge_model_ids = [
    # "gpt-4.1-mini-2025-04-14",
    # "gpt-4o-2024-08-06",
    "google/gemini-2.5-flash-preview-04-17",
    # "gemini-2.5-flash-preview-04-17",
]
RESULTS_DIR = "data/20250507/reasoning_after_runs/"

In [None]:
exps = [
    "after1",
    "after2",
    "after3",
    "after4",
    "after5",
]
for decision_model_id in after_model_ids:
    continue_to_next_model = False
    for exp in exps:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=exp,
            results_dir=RESULTS_DIR,
            override_decision_temperature=1.0,  # default 0.0
            prompts_template='reasoning_after',
            # prompts_template='no_reasoning',
            choices_filename="choices_672.csv",
        )
        await dr.run(
            # limit=10,
            # disable_validation=True,
            # overwrite=True,
            try_retries=False,
        )
        # dr.process()
    print(f"Finished decision model {decision_model_id}.\n\n")


In [None]:
exps = [
    "after1",
    "after2",
    "after3",
    "after4",
    "after5",
]
for decision_model_id in after_model_ids:
    for exp in exps:
        jr = JudgeRunner(
            decision_model_id=decision_model_id,
            decision_run_name=exp,
            judge_model_id=judge_model_ids[0],
            judge_run_name=exp,
            results_dir=RESULTS_DIR,
            judge_cot=True,
            override_judge_temperature=0.0,
        )
        await jr.run_rationales()

    print(f"Finished analyzing decision model {decision_model_id}.\n\n")