## Setup

In [1]:
import glob
import os
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd
import colorsys
from pathlib import Path

import numpy as np
from adjustText import adjust_text

from moral_lens.models import load_model_config


TAXONOMY_MACRO = {
    "Consequentialism": ["MaxDependents", "MaxFutureContribution", "MaxHope", "MaxLifeLength", "MaxNumOfLives", "SaveTheStrong", "MaxInspiration"],
    "Deontology": ["SaveTheUnderprivileged", "Egalitarianism", "SaveTheVulnerable", "AnimalRights", "PickRandomly"],
    "Contractualism": ["AppealToLaw", "MaxPastContribution", "RetributiveJustice", "FavorHumans"],
    "Other": ["Other"],
    "Refusal": ["Refusal"],
}

TAXONOMY_MICRO = [
    micro
    for micro_list in TAXONOMY_MACRO.values()
    for micro in micro_list
]

[INFO] Configured API keys: HF_TOKEN, OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY, OPENROUTER_API_KEY


## Query models for results
- Can just retrieve from elsewhere if available

In [2]:
from moral_lens.dilemma import DilemmaRunner
from moral_lens.judge import JudgeRunner
from moral_lens.config import PathConfig
from moral_lens.utils import mydisplay

In [3]:
decision_model_ids = [
    # "openrouter/quasar-alpha",
    # "openrouter/optimus-alpha",
    # "gemini-2.0-flash-lite-001",
    "gemini-2.0-flash-001",
    "gpt-3.5-turbo-0125",
    # "gpt-4o-2024-08-06",
    "gpt-4o-mini-2024-07-18",
    # "o3-mini-2025-01-31:low",
    "meta-llama/llama-4-scout",
    # "meta-llama/llama-4-maverick",
    # "meta-llama/llama-3.1-8b-instruct",
    # "meta-llama/llama-3.3-70b-instruct",
    # "deepseek/deepseek-chat-v3-0324",
    # "qwen/qwq-32b",
    # "qwen/qwen-plus",
    # "microsoft/phi-4",
]
# path_config = PathConfig(results_dir=Path("moral_lens/experimental_data/decision_consistency"))
path_config = PathConfig(results_dir=Path("moral_lens/experimental_data/intersectional_analysis"))

Get 3 samples from each model at temperature=0.7

In [4]:
for decision_model_id in decision_model_ids:
    for experiment in ["s1", "s2", "s3"]:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=experiment,
            path_config=path_config,
            choices_filename="choices_new.csv",
            override_decision_temperature=0.7,
        )
        await dr.run()
        dr.process()

[INFO] Gemini model gemini-2.0-flash-001 loaded.


Valid responses received: 100%|##########| 8/8 [00:01<00:00,  5.43it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s1.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s1.csv.
[INFO] Gemini model gemini-2.0-flash-001 loaded.


Valid responses received: 100%|##########| 8/8 [00:01<00:00,  6.61it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s2.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s2.csv.
[INFO] Gemini model gemini-2.0-flash-001 loaded.


Valid responses received: 100%|##########| 8/8 [00:00<00:00,  8.15it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s3.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gemini-2.0-flash-001_s3.csv.
[INFO] OpenAI model gpt-3.5-turbo-0125 loaded.


Valid responses received:  88%|########7 | 7/8 [00:10<00:01,  1.46s/it]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s1.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s1.csv.
[INFO] OpenAI model gpt-3.5-turbo-0125 loaded.


Valid responses received:  88%|########7 | 7/8 [00:10<00:01,  1.55s/it]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s2.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s2.csv.
[INFO] OpenAI model gpt-3.5-turbo-0125 loaded.


Valid responses received:  88%|########7 | 7/8 [00:08<00:01,  1.22s/it]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s3.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-3.5-turbo-0125_s3.csv.
[INFO] OpenAI model gpt-4o-mini-2024-07-18 loaded.


Valid responses received: 100%|##########| 8/8 [00:05<00:00,  1.58it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s1.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s1.csv.
[INFO] OpenAI model gpt-4o-mini-2024-07-18 loaded.


Valid responses received: 100%|##########| 8/8 [00:05<00:00,  1.47it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s2.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s2.csv.
[INFO] OpenAI model gpt-4o-mini-2024-07-18 loaded.


Valid responses received: 100%|##########| 8/8 [00:05<00:00,  1.48it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s3.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/gpt-4o-mini-2024-07-18_s3.csv.
[INFO] OpenRouter model meta-llama/llama-4-scout loaded.


Valid responses received: 100%|##########| 8/8 [00:02<00:00,  3.12it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s1.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s1.csv.
[INFO] OpenRouter model meta-llama/llama-4-scout loaded.


Valid responses received: 100%|##########| 8/8 [00:01<00:00,  4.98it/s]


[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s2.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s2.csv.
[INFO] OpenRouter model meta-llama/llama-4-scout loaded.


Valid responses received: 100%|##########| 8/8 [00:02<00:00,  3.36it/s]

[INFO] Responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s3.csv.
[INFO] Processed responses saved to moral_lens/experimental_data/intersectional_analysis/responses/llama-4-scout_s3.csv.





In [4]:
for decision_model_id in decision_model_ids:
    for experiment in ["s1", "s2", "s3"]:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=experiment,
            path_config=path_config,
            # override_decision_temperature=0.7,
        )
        # await dr.run()
        dr.process()

Output file already exists at moral_lens/experimental_data/decision_consistency/responses/phi-4_s1.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] Processed responses saved to moral_lens/experimental_data/decision_consistency/responses/phi-4_s1.csv.
Output file already exists at moral_lens/experimental_data/decision_consistency/responses/phi-4_s2.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] Processed responses saved to moral_lens/experimental_data/decision_consistency/responses/phi-4_s2.csv.
Output file already exists at moral_lens/experimental_data/decision_consistency/responses/phi-4_s3.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] Processed responses saved to moral_lens/experimental_data/decision_consistency/responses/phi-4_s3.csv.


## Handle invalid responses

In [17]:
# Rerun if needed
for decision_model_id in decision_model_ids:
    for experiment in ["s1", "s2", "s3"]:
        dr = DilemmaRunner(
            model_id=decision_model_id,
            decision_run_name=experiment,
            path_config=path_config,
            override_decision_temperature=0.7,
        )
        dr.load_data()
        rows_to_rerun = dr.data[dr.data.decision.str.len() == 0].index.to_list()
        await dr.rerun_for_indices(rows_to_rerun)

Output file already exists at moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s1.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] OpenRouter model meta-llama/llama-4-maverick loaded.


Valid responses received:  25%|##5       | 1/4 [00:11<00:33, 11.20s/it]


[INFO] Responses updated in moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s1.csv.
Output file already exists at moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s2.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] OpenRouter model meta-llama/llama-4-maverick loaded.


Valid responses received:   0%|          | 0/3 [00:11<?, ?it/s]


[INFO] Responses updated in moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s2.csv.
Output file already exists at moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s3.csv. Use `overwrite=True` in .run() to overwrite it.
[INFO] OpenRouter model meta-llama/llama-4-maverick loaded.


Valid responses received:   0%|          | 0/3 [00:12<?, ?it/s]

[INFO] Responses updated in moral_lens/experimental_data/decision_consistency/responses/llama-4-maverick_s3.csv.



