# Summary

The notebook evaluates different LLMs on moral foundation classification using Haidt's Moral Foundations Theory. 

It tests models' ability to identify five moral foundations (care/harm, fairness/cheating, loyalty/betrayal, authority/subversion, sanctity/degradation) in text samples and compares their performance using accuracy metrics logged to Weights & Biases.

Run through `Papermill` with custom parameters:
```bash
papermill ask_llm.ipynb output.ipynb -p model_name "llama3_8b" -p test_data 'morality-MFRC' -p sample 10 -p temperature 0.3 --log-output
```

In [None]:
import json
import pandas as pd
from typing import List

import replicate
from datasets import Dataset, load_dataset
from dotenv import load_dotenv
from pydantic import BaseModel
from tqdm.notebook import tqdm

load_dotenv()

# Config

Model selection, dataset parameters, temperature settings, and classification prompt.

In [2]:
# Parameters

model_name = 'qwen3-235b' # llama3_8b, deepseek-v3, claude-3-5-sonnet, claude-4-sonnet, gpt5-mini, qwen3-235b
demo = 'false'
sample = 100
test_data = 'morality-eMFD' # morality-MFRC, morality-MFTC, morality-eMFD
temperature = 0.3
random_state = 42
use_async = True
prompt = """You are an expert in moral psychology, classifying text according to Haidt's theory.
            For each moral foundation, mark true if moral values from that foundation are expressed in the text, false if not expressed.
                                                   
            Answer only with a valid JSON in this format:
            {
                "care/harm": [true / false],
                "fairness/cheating": [true / false],
                "loyalty/betrayal": [true / false],
                "authority/subversion": [true / false],
                "sanctity/degradation": [true / false],
            }
            """
            #             Provide an explanation.
            
            # Haidt's foundations are:
            #     "care/harm": This foundation is related to our long evolution as mammals with attachment systems and an ability to feel (and dislike) the pain of others. It underlies virtues of kindness, gentleness, and nurturance.
            #     "fairness/cheating": This foundation is related to the evolutionary process of reciprocal altruism. It generates ideas of justice, rights, and autonomy.
            #     "loyalty/betrayal":  This foundation is related to our long history as tribal creatures able to form shifting coalitions. It underlies virtues of patriotism and self-sacrifice for the group. It is active anytime people feel that it’s “one for all, and all for one.”
            #     "authority/subversion": This foundation was shaped by our long primate history of hierarchical social interactions. It underlies virtues of leadership and followership including deference to legitimate authority and respect for traditions.
            #     "sanctity/degradation": This foundation was shaped by the psychology of disgust and contamination. It underlies religious notions of striving to live in an elevated, less carnal, more noble way. It underlies the widespread idea that the body is a temple that can be desecrated by immoral activities and contaminants (an idea not unique to religious traditions).


#                 "reasoning": [justification]
# papermill ask_llm.ipynb output.ipynb -p model_name "llama3_8b" -p test_data 'morality-MFRC' -p sample 10 -p temperature 0.7 --log-output

In [None]:
import wandb
wandb.init(project='morality-llm',)
wandb.config.update({
   'model_name': model_name,
   'demo': demo,
   'sample': sample,
   'test_data': test_data,
   'temperature': temperature,
   'random_state': random_state,
   'prompt': prompt
})

# RESUME
# import wandb
# wandb.init(project='morality-llm', id="1f1usujv", resume="must")
# run = wandb.Api().run("morality-llm/1f1usujv")
# for key, value in run.config.items():
#    globals()[key] = value

# LLMs

API integrations for various language models (Llama, DeepSeek, Claude, OpenAI, Qwen) with async processing capabilities

In [4]:
import asyncio
from pprint import pprint

def analyze_with_llama3_8b(text: str, temperature=temperature):
   system_prompt = prompt

   output = replicate.run(
      "meta/meta-llama-3-8b-instruct",
      input={
         "system_prompt": system_prompt,
         "prompt": f'Text: "{text}"',
         "max_new_tokens": 200,
         # "temperature": temperature,
         "return_full_text":False
      }
   )

   
   response_text = "".join(output).strip()
   # json_start = response_text.find('[')
   # json_end = response_text.rfind(']') + 1
   # json_str = response_text[json_start:json_end]
   # #clean = MoralAnalysis(**json.loads(json_str))
   # clean = json.loads(json_str)
#    print(output)

   return response_text


def analyze_with_llama4_maverick(text: str, temperature=temperature):
   system_prompt = prompt
   output = replicate.run(
      "meta/llama-4-maverick-instruct",
      input={
         # "system_prompt": system_prompt,
         "prompt": f'{system_prompt}\n Summarize reasoning after briefly. \n Text: "{text}"',
         "max_tokens": 100,
         "temperature": temperature,
         "return_full_text":False
      }
   )
   
   response_text = "".join(output).strip()
   return response_text


async def analyze_with_llama4_maverick_async(text: str, temperature=0.3):
   output = await asyncio.to_thread(
      analyze_with_llama4_maverick,
      text=text,
      temperature=temperature,
      )
   return output


# Usage
texts = [
   # "We must protect our children from harm",
   # "Everyone deserves equal pay for equal work",
   # "Stand with our troops who sacrifice for freedom",
   # "Respect your elders and traditional values",
   # "Marriage is sacred and must be protected",
   # "The police officer arrested a peacefully protesting young woman",
   # "My cat likes eating raw beef meet.",
   # "Anyone think Macron should dispose of the Algerian dictator? France did little to support the uprsings of 2010-2012."
]

# DEMO
for text in texts:
   try:
       result = await analyze_with_llama4_maverick_async(text)
       pprint(f"\nText: {text}")
       pprint(f"Moral content: {result}")
      #  print(f"Moral content: {result.overall_moral_content}")
      #  for foundation in result.foundations:
      #      print(f"  {foundation.foundation}")
      #    #   print(f"  {foundation.foundation}: - {foundation.explanation}")
   except Exception as e:
       pprint(f"Error: '{text}' → {e}")
       


In [5]:
import asyncio

def analyze_with_deepseek(text: str, temperature=temperature):
    output = replicate.run(
        "deepseek-ai/deepseek-v3",
        input={
            "prompt": prompt+f"""\nText: {text}.""",
            "max_new_tokens": 200,
            "temperature": temperature
        }
    )
    
    response_text = "".join(output).strip()

    return response_text

async def analyze_with_deepseek_async(text: str, temperature=0.3):
   output = await asyncio.to_thread(
       replicate.run,
       "deepseek-ai/deepseek-v3",
        input={
            "prompt": prompt+f"""\nText: {text}.""",
            "max_new_tokens": 200,
            "temperature": temperature
        }
        )

   response = ''.join(output)
   return response

text = "Anyone think Macron should dispose of the Algerian dictator? France did little to support the uprsings of 2010-2012."
# text = "The CEO received a $10 million bonus while laying off workers."
# text = "My cat eats raw meet."
# text = "The marrige should be protected"
# analyze_with_deepseek(text)
# await analyze_with_deepseek_async(text)

In [None]:
import anthropic

def analyze_with_claude(text: str, temperature=0.7):
    client = anthropic.Anthropic()
    output = client.messages.create(
        # model="claude-3-5-sonnet-20241022",
        model="claude-4-sonnet-20250514",
        max_tokens=250,
        temperature=temperature,
        system=prompt+"+ brief reasoning",
        messages=[
            {"role": "user", "content": f"Text: {text}"}
        ]
    )
    response_text = output.content[0].text.strip()
    
    return response_text

import anthropic
from tqdm.asyncio import tqdm as tqdm_async

async def analyze_with_claude_async(text: str,  temperature=0.7):
    client=anthropic.AsyncAnthropic()
    output = await client.messages.create(
        model="claude-4-sonnet-20250514",
        max_tokens=250,
        temperature=temperature,
        # system=prompt,
        system=[
            {"type": "text", "text": prompt, "cache_control": {"type": "ephemeral"}}
        ],
        messages=[
            {"role": "user", "content": f"Text: {text}"}
        ]
    )
    response_text = output.content[0].text.strip()
    
    return response_text

# Usage
text = "Anyone think Macron should dispose of the Algerian dictator? France did little to support the uprsings of 2010-2012."
# text = "Immigrants make up 15% of the workforce in this sector."
# text = "The CEO received a $10 million bonus while laying off workers."
# text = "The church collects $50,000 weekly in donations."
response = analyze_with_claude(text)
print(response)

In [None]:
from openai import AzureOpenAI
import asyncio

def analyze_with_openai(text: str, temperature=0.3, model_name="gpt-5-nano"):
   client = AzureOpenAI(api_version = "2024-02-15-preview", azure_endpoint ="https://murmur.openai.azure.com/")
   response = client.chat.completions.create(
      model=model_name,  
      messages=[
         {"role": "system", "content": prompt},
         {"role": "user", "content": f'Text: {text}'}
      ],
      # temperature=0.3,
      # max_completion_tokens=450,
   )
   response = response.choices[0].message.content
   return response


async def analyze_with_openai_async(text: str, temperature=0.3, model_name="gpt-5-nano"):
   response = await asyncio.to_thread(
        analyze_with_openai,
         {
            'text':text,
            'model_name':model_name,
            'temperature':temperature,
         }
   )
   return response


text = "Anyone think Macron should dispose of the Algerian dictator? France did little to support the uprsings of 2010-2012."
# text = "The CEO received a $10 million bonus while laying off workers."
response = await analyze_with_openai_async(text)
print(response)

In [None]:
def analyze_with_qwen3235b(text: str, temperature=0.3):
   output = replicate.run(
      "qwen/qwen3-235b-a22b-instruct-2507",
      input={
         # "system_prompt": system_prompt,
         "prompt": prompt+ "\n Summarize reasoning after briefly. \n" + f'text: {text}',
         "max_tokens": 150,
         "temperature": temperature,
         "return_full_text": False
      }
   )
    
   response = ''.join(output)
   return response

async def analyze_with_qwen3235b_async(text: str, temperature=0.3):
   response = await asyncio.to_thread(
        analyze_with_qwen3235b,
       {"text": text,
        "temperature":temperature,
        }
   )
   return response


text = "Anyone think Macron should dispose of the Algerian dictator? France did little to support the uprsings of 2010-2012."
# text = "The CEO received a $10 million bonus while laying off workers."
# response = await analyze_with_qwen3235b_async(text)
print(response)

# Data

Dataset loading, preprocessing, and sample selection for moral foundation testing

In [None]:
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np

ds_test = load_dataset(f"maciejskorski/{test_data}")['train']

labels = ds_test.to_pandas().groupby('text_id')['label'].apply(';'.join)
texts = ds_test.to_pandas().groupby('text_id')['text'].first()

ds_test = Dataset.from_dict({'id': labels.index, 'text':texts, 'label': labels})

if sample > 0:
    _,test_idxs = train_test_split(range(ds_test.num_rows), test_size=sample, shuffle=True, random_state=random_state)
else:
    # sample = len(ds_test)
    test_idxs = np.arange(ds_test.num_rows)

ds_test_sample = ds_test.select(test_idxs)
display(ds_test_sample.select(range(5)).to_pandas())

# Processing

Batch processing pipeline with error handling and concurrent request management for LLM inference

In [None]:
CALL_FUNCTIONS = {
    'llama3_8b': analyze_with_llama3_8b, 
    'llama4_maverick':  analyze_with_llama4_maverick_async if use_async else analyze_with_llama4,
    'deepseek-v3': analyze_with_deepseek_async if use_async else analyze_with_deepseek,
    'claude-4-sonnet':analyze_with_claude_async if use_async else analyze_with_claude,
    'gpt-5-nano': analyze_with_openai_async if use_async else analyze_with_openai,
    'gpt-5-mini': analyze_with_openai_async if use_async else analyze_with_openai,
    'qwen3-235b': analyze_with_qwen3235b_async if use_async else analyze_with_openai_async
}

analyze_with_llm = CALL_FUNCTIONS.get(model_name)

In [None]:
import sys
import asyncio

semaphore = asyncio.Semaphore(10)  # Max concurrent requests

async def process_async(example):
    async with semaphore:
        try:
            idx = example['id']
            response = await analyze_with_llm(example['text'])
            out = {"index": idx, "predictions": response}
        except Exception as e:
            out = {"index": idx, "predictions": "", "error": str(e)}
        return out

async def process_all_async():
    tasks = list(map(process_async, ds_test_sample))
    results = []
    with open(f'results_{wandb.run.id}.jsonl', 'w') as f:
        for task in tqdm_async.as_completed(tasks, file=sys.stdout):
            result = await task
            results.append(result)
            f.write(json.dumps(result) + '\n')
    return results

def process(example):
    idx = example['id']
    response = analyze_with_llm(example['text'])
    out = {"index": idx, "predictions": response}
    return out

def process_all():
    tasks = map(process, ds_test_sample)
    results = []
    for task in tqdm(tasks, file=sys.stdout, total = len(ds_test_sample)):
        result = task
        results.append(result)
    return results

if use_async:
    results = await process_all_async()
else:
    results = process_all()

In [None]:
df_results = pd.json_normalize(results) 
df_results['label'] = pd.Series(ds_test_sample['label'],index=ds_test_sample['id']).loc[df_results['index']].values
df_results.head()

In [26]:
wandb.log({"predictions": wandb.Table(dataframe=df_results)})

In [28]:
import re

moral_targets = ['care', 'fairness', 'loyalty', 'authority', 'sanctity']
pattern = re.compile(r':\s*\[?(true|false)\]?', re.IGNORECASE)

def extract_booleans(text):
   # Find true/false values in order
   matches = re.findall(r':\s*(true|false)', text.lower())[:5]
   return [m == 'true' for m in matches]

# bool_lists = df_results['predictions'].apply(extract_booleans)
bool_lists = df_results['predictions'].str.findall(pattern).str[:5].apply(lambda x: [s == 'true' for s in x])

y_pred = pd.DataFrame(bool_lists.tolist(), columns=moral_targets).astype(bool)

In [None]:
out = df_results
y_true = pd.DataFrame( zip(*[out['label'].str.contains(target) for target in moral_targets]), columns=moral_targets )

from sklearn.metrics import classification_report

report = classification_report(y_true, y_pred, 
                            target_names=moral_targets, 
                            output_dict=True, 
                            zero_division=0)
report = pd.DataFrame(report).T
report.index.name = "target"
report

In [30]:
wandb.log({"accuracy": wandb.Table(dataframe=report.reset_index())})

In [None]:
wandb.finish()

In [19]:
# import wandb
# import pandas as pd
# api = wandb.Api()
# run_id = "ftz7ti3p"
# project_name = "morality-llm"
# # run = api.run(f"{project_name}/{run_id}")
# artifact = api.artifact(f"{project_name}/run-{run_id}-accuracy:latest")

# # modify the artifact, here add an index
# table = artifact.get("accuracy")
# table = pd.DataFrame(table.data,columns=table.columns)
# table.index = ['care', 'fairness', 'loyalty', 'authority', 'sanctity', 'micro avg', 'macro avg', 'weighted avg', 'samples avg']
# table.index.name = "target"

# # upload it
# with wandb.init(project=project_name, id=run_id, resume="allow"):
#     wandb.log({"accuracy": wandb.Table(dataframe=table.reset_index())})