## imports

In [None]:
import pandas as pd
import json
import re
from tqdm import tqdm

In [None]:
# for file parsing

from bs4 import BeautifulSoup
import os
import glob

## install llm and .env

In [None]:
pip install anthropic

Collecting anthropic
  Downloading anthropic-0.64.0-py3-none-any.whl.metadata (27 kB)
Downloading anthropic-0.64.0-py3-none-any.whl (297 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.2/297.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.64.0


In [None]:
!pip install python-dotenv



In [None]:
import anthropic
import json
import time

In [None]:
from dotenv import load_dotenv

In [None]:
load_dotenv()

True

In [None]:
client = anthropic.Anthropic(
    api_key=os.getenv('ANTHROPIC_API_KEY'),
)

## file parsing

In [None]:
def process_thread(xml_content: str):
  soup = BeautifulSoup(xml_content, features="xml")
  submission = soup.find('submission')

  op_user = submission.find('original_poster').text.strip()
  op_text = submission.find('original_post').text.strip()
  original_post = f'<original_post user="{op_user}">{op_text}</original_post>'


  comments_text = soup.find_all('comment')

  comments = ""
  for idx, comment in enumerate(comments_text, start=1):
      user = comment.get('user', 'unknown')
      text_elem = comment.find('text')
      text = text_elem.text.strip() if text_elem else ""
      comments += f'<comment idx={idx} user="{user}">{text}</comment>\n'

  return original_post, comments

## JSONL file for batch-processing (i.e. creating a set of API calls to submit at once to openAI)

I am pre-processing XML files to submit them as a string of the kind:

```
<original_post>...</original_post>\n\n
<comment idx=1 user=name1>...</comment>\n
<comment idx=2 user=name2>...</comment>\n
...
<comment idx=5 user=name3>...</comment>\n
```

Thus hiding from LLM unnecessary noise not to interfere with the results (e.g. delta information, or user name) and to improve efficiency (bc of lower number of tokens to process).

I will be requesting evaluation for the first 5 comments in the thread. That is because 5 is the lowest number of comments in the sample. Going up would force N/A values in the resulting dataset, and it would create more complications.

In [None]:
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request

In [None]:
instructions = """
You are a moderator with expertise in linguistics. You will receive a conversation thread - a parsed XML file with the following tags:
* <filename>: ID of the conversation
* <original_post>: Original post content
* <comment>: Chronologically ordered first five comments

## Task
Write a short summary capturing the trajectory of an online conversation, turn by turn.

## Requirements
- Do not include specific topics, claims, or arguments from the conversation
- Include no more than 1 sentence describing the impact of each turn
- The overall number of sentences in your output should not exceed 6
- Focus on conversational dynamics, not content

## Style to Avoid
Example of what NOT to do:
"Speaker1, who is Asian, defended Asians and pointed out that a study found that whites, Hispanics, and blacks were accepted into universities in that order, with Asians being accepted the least. Speaker2 acknowledged that Asians have high household income, but argued that this could be a plausible explanation for the study's findings. Speaker1 disagreed and stated that the study did not take wealth into consideration."

This style mentions specific claims and topics, which are not needed.

## Style to Follow
Do include:
- Sentiment indicators (e.g., sarcasm, passive-aggressive, polite, frustration, attack, blame)
- Individual intentions (e.g., agreement, disagreement, persistent-agreement, persistent-disagreement, rebuttal, defense, concession, confusion, clarification, neutral, accusation)
- Conversational strategies (e.g., rhetorical questions, straw man fallacy, identifying fallacies, appealing to emotions)

Examples of correct style:

Example 1: "Both speakers have differing opinions and appeared defensive. Speaker1 attacks Speaker2 by diminishing the importance of his argument and Speaker2 blames Speaker1 for using profane words. Both speakers accuse each other of being overly judgmental of their personal qualities rather than arguments."

Example 2: "The two speakers refuted each other with back-and-forth accusations. Throughout the conversation, they kept harshly fault-finding with overly critical viewpoints, creating an intense and inefficient discussion."

Example 3: "Speaker1 attacks Speaker2 by questioning the relevance of his premise and Speaker2 blames Speaker1 for using profane words. Both speakers accuse each other of being overly judgmental of their personal qualities rather than arguments."

## Focus Areas
The trajectory summary should capture the moments where the tension of the conversation notably changes.

## Complete Example
Trajectory summary:
Four speakers express their different points of view subsequently, building off of each other's arguments. Speaker1 disagrees with a specific point from Speaker2's argument, triggering Speaker2 to contradict Speaker1 in response. Then, Speaker3 jumps into the conversation to support Speaker1's argument, which leads Speaker2 to adamantly defend their argument. Speaker2 then quotes a deleted comment, giving an extensive counterargument. The overall tone remains civil.

Now, provide the trajectory summary for the following conversation.
"""

In [None]:
def prepare_batch_line(file, original_post, comments, instructions, i):
  conversation_text = f"<filaname>{file}</filename>\n<original_post>{original_post}</original_post>\n\n{comments}"
  system_prompt = instructions

  request = Request(
        custom_id=f"request-{i}",
        params=MessageCreateParamsNonStreaming(
            model="claude-opus-4-20250514",
            max_tokens=1024,
            system=[{
                "type": "text",
                "text": instructions,
                "cache_control": {"type": "ephemeral"}
                }],
            messages=[{
                "role": "user",
                "content": conversation_text
                }],
            temperature=0
        )
    )

  return request


In [None]:
def run_summary(parsed_content, instructions):

    response = client.messages.create(
        model="claude-opus-4-20250514",
        max_tokens=2048,
        temperature=0,
        system=[{
                "type": "text",
                "text": instructions,
                "cache_control": {"type": "ephemeral"}
                }],
            messages=[{
                "role": "user",
                "content": parsed_content
                }],
    )

    return response.content[0].text.strip()

In [None]:
input_dir = f"{cwd}/100_sample/"
xml_files = glob.glob(os.path.join(input_dir, "*.xml"))
xml_dict = {}
decision_dict = {}
batch_lines = []

for i, file in enumerate(xml_files, 1):
  with open(file, 'r', encoding='utf-8') as f:
    xml_content = f.read()
    op, comms = process_thread(xml_content)
    fn = os.path.basename(file)

  req = prepare_batch_line(fn, op, comms, instructions, i)
  decision_dict[f'request-{i}'] = fn
  batch_lines.append(req)



In [None]:
print(len(batch_lines))

97


In [None]:
requests = []
for r in batch_lines:
  requests.append(r)

In [None]:
message_batch = client.messages.batches.create(requests=requests)
print(message_batch)

In [None]:
mbatch_id = message_batch.id

In [None]:
message_batch = client.messages.batches.retrieve(
    mbatch_id,
)

print(message_batch)

### processing batch output (JSONL -> df)

In [None]:
import json

records = []

for result in client.messages.batches.results(mbatch_id):
    custom_id = result.custom_id

    text_blocks = result.result.message.content
    content_str = "".join(block.text for block in text_blocks if block.type == "text")


    # flatten nested JSON
    flattened = {'custom_id': custom_id,
                 'response': content_str}

    records.append(flattened)


In [None]:
resp_df = pd.DataFrame(records)

In [None]:
print(resp_df.head())

   custom_id                                           response
0  request-1  Trajectory summary:\nThe original poster prese...
1  request-2  Trajectory summary:\nThe conversation begins w...
2  request-3  Trajectory summary:\nThe original poster prese...
3  request-4  Trajectory summary:\nThe original poster prese...
4  request-5  Trajectory summary:\nThe original poster prese...


In [None]:
decisions_files = pd.DataFrame(list(decision_dict.items()), columns=["custom_id", "filename"])

In [None]:
resp_merged = resp_df.merge(decisions_files, on="custom_id", how="left")

In [None]:
resp_merged

Unnamed: 0,custom_id,response,filename
0,request-1,Trajectory summary:\nThe original poster prese...,1062071645.0_1_delta_threads.xml
1,request-2,Trajectory summary:\nThe conversation begins w...,1075040167.0_1_delta_threads.xml
2,request-3,Trajectory summary:\nThe original poster prese...,115882088.0_1_delta_threads.xml
3,request-4,Trajectory summary:\nThe original poster prese...,113567594.0_1_delta_threads.xml
4,request-5,Trajectory summary:\nThe original poster prese...,1082495263.0_2_delta_threads.xml
...,...,...,...
92,request-93,Trajectory summary:\nThe original poster prese...,363480636.0_3_deltaless_thread.xml
93,request-94,Trajectory summary:\nThe original poster expre...,410977978.0_3_deltaless_thread.xml
94,request-95,Trajectory summary:\nThe original poster expre...,410977978.0_4_deltaless_thread.xml
95,request-96,Trajectory summary:\nThe original poster expre...,442068628.0_4_deltaless_thread.xml


In [None]:
resp_merged["response"] = resp_merged["response"].str.replace("Trajectory summary:\n", "", regex=False)

In [None]:
resp_merged

Unnamed: 0,custom_id,response,filename
0,request-1,The original poster presents a critical view b...,1062071645.0_1_delta_threads.xml
1,request-2,The conversation begins with Speaker1 presenti...,1075040167.0_1_delta_threads.xml
2,request-3,The original poster presents a defense of farm...,115882088.0_1_delta_threads.xml
3,request-4,The original poster presents a pessimistic vie...,113567594.0_1_delta_threads.xml
4,request-5,The original poster presents a concern about p...,1082495263.0_2_delta_threads.xml
...,...,...,...
92,request-93,The original poster presents a controversial p...,363480636.0_3_deltaless_thread.xml
93,request-94,The original poster expresses extreme frustrat...,410977978.0_3_deltaless_thread.xml
94,request-95,The original poster expresses extreme frustrat...,410977978.0_4_deltaless_thread.xml
95,request-96,The original poster expresses deep pessimism a...,442068628.0_4_deltaless_thread.xml


In [None]:
def count_sentences(text):
    sentences = re.split(r'[.!?](?:\s|$)', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    return len(sentences)

resp_merged["sentence_count"] = resp_merged["response"].apply(count_sentences)

In [None]:
resp_merged['outcome'] = resp_merged['filename'].apply(lambda x: 0 if 'deltaless' in str(x) else 1)

In [None]:
resp_merged.to_csv(f'{cwd}/97_claude_summaries.csv')

In [None]:
df = pd.DataFrame(list(xml_dict.items()), columns=["filename", "summaries"])

In [None]:
df["summaries"] = df["summaries"].str.replace("Trajectory summary:\n", "", regex=False)

In [None]:
def count_sentences(text):
    sentences = re.split(r'[.!?](?:\s|$)', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    return len(sentences)

df["sentence_count"] = df["summaries"].apply(count_sentences)

In [None]:
df['outcome'] = df['filename'].apply(lambda x: 0 if 'deltaless' in str(x) else 1)

In [None]:
df.head()

Unnamed: 0,filename,summaries,sentence_count,outcome
0,1062071645.0_1_delta_threads.xml,The original poster presents a critical argume...,5,1
1,1075040167.0_1_delta_threads.xml,The original poster makes a controversial clai...,5,1
2,115882088.0_1_delta_threads.xml,The original poster shares personal observatio...,5,1
3,113567594.0_1_delta_threads.xml,The original poster presents a pessimistic vie...,5,1
4,1082495263.0_2_delta_threads.xml,The original poster presents a concern about p...,4,1


### summaries > prediction (yes or no)

In [None]:
#df_sampled = pd.read_csv(f'{cwd}/97_claude_summaries.csv')
df_sampled = pd.read_csv(f'{cwd}/74_claude_summaries.csv')

In [None]:
del df_sampled[df_sampled.columns[0]]

In [None]:
df_sampled

Unnamed: 0,filename,outcome,summary_corrected,corrected,sentence_count
0,1062071645.0_3_deltaless_thread.xml,0,The original poster shares a personal anecdote...,1,4
1,1075040167.0_2_deltaless_thread.xml,0,The original poster makes a controversial clai...,1,4
2,1075040167.0_5_deltaless_thread.xml,0,The original poster makes a controversial clai...,1,4
3,1082495263.0_2_delta_threads.xml,1,"The original poster presents a concern, which ...",1,5
4,1102614149.0_2_delta_threads.xml,1,The original poster presents an interpretive q...,1,4
...,...,...,...,...,...
69,410977978.0_3_deltaless_thread.xml,0,The original poster expresses extreme frustrat...,1,5
70,442068628.0_1_delta_threads.xml,1,The original poster expresses pessimism about ...,1,5
71,442068628.0_4_deltaless_thread.xml,0,The original poster expresses deep pessimism a...,1,5
72,444945722.0_1_delta_threads.xml,1,The original poster presents a controversial v...,1,5


In [None]:
instructions = """
You are a moderator with expertise in linguistics. \
You will receive a summary of an online discussion thread, \
which includes the original post and subsequent comments. \
The summary highlights key moments where the tone or tension \
in the conversation shifts.

## Task
Analyze the summary and decide whether the conversation would have \
required moderator intervention to prevent or repair derailment.

Provide your answer with a simple 'Yes' or 'No', followed by a brief explanation.
"""

In [None]:
def prepare_batch_line(summary, instructions, i):

  system_prompt = instructions

  request = Request(
        custom_id=f"request-{i}",
        params=MessageCreateParamsNonStreaming(
            model="claude-opus-4-20250514",
            max_tokens=1024,
            system=[{
                "type": "text",
                "text": system_prompt,
                "cache_control": {"type": "ephemeral"}
                }],
            messages=[{
                "role": "user",
                "content": summary
                }],
            temperature=0
        )
    )

  return request

In [None]:
decision_dict = {}
batch_lines = []

i = 1

for index, row in tqdm(df_sampled.iterrows(), total=len(df_sampled)):
    #summary = row['response']
    summary = row['summary_corrected']
    fn = row['filename']

    req = prepare_batch_line(summary, instructions, i)
    decision_dict[f'request-{i}'] = fn
    i += 1
    batch_lines.append(req)



100%|██████████| 74/74 [00:00<00:00, 10433.59it/s]


In [None]:
requests = []
for r in batch_lines:
  requests.append(r)

In [None]:
message_batch = client.messages.batches.create(requests=requests)
print(message_batch)

In [None]:
mbatch_id = message_batch.id

In [None]:
decision_dict

{'request-1': '1062071645.0_3_deltaless_thread.xml',
 'request-2': '1075040167.0_2_deltaless_thread.xml',
 'request-3': '1075040167.0_5_deltaless_thread.xml',
 'request-4': '1082495263.0_2_delta_threads.xml',
 'request-5': '1102614149.0_2_delta_threads.xml',
 'request-6': '1102614149.0_4_deltaless_thread.xml',
 'request-7': '113567594.0_1_delta_threads.xml',
 'request-8': '113567594.0_4_deltaless_thread.xml',
 'request-9': '115882088.0_1_delta_threads.xml',
 'request-10': '115882088.0_2_deltaless_thread.xml',
 'request-11': '134214340.0_1_delta_threads.xml',
 'request-12': '134214340.0_11_deltaless_thread.xml',
 'request-13': '1378810771.0_1_delta_threads.xml',
 'request-14': '1437482501.0_1_delta_threads.xml',
 'request-15': '1437482501.0_91_deltaless_thread.xml',
 'request-16': '144404372.0_19_deltaless_thread.xml',
 'request-17': '144404372.0_3_delta_threads.xml',
 'request-18': '144404372.0_8_deltaless_thread.xml',
 'request-19': '154839924.0_3_delta_threads.xml',
 'request-20': '1

In [None]:
message_batch = client.messages.batches.retrieve(
    mbatch_id,
)

print(message_batch)

# processing batch output (JSONL -> df)

In [None]:
import json

records = []

for result in client.messages.batches.results(mbatch_id):
    custom_id = result.custom_id

    text_blocks = result.result.message.content
    content_str = "".join(block.text for block in text_blocks if block.type == "text")

    flattened = {'custom_id': custom_id,
                 'response': content_str}

    records.append(flattened)


In [None]:
resp_df = pd.DataFrame(records)

In [None]:
print(resp_df.head())

   custom_id                                           response
0  request-1  Yes.\n\nThe conversation clearly deteriorated ...
1  request-2  No.\n\nWhile the conversation began with contr...
2  request-3  No.\n\nWhile the discussion involves controver...
3  request-4  No.\n\nThis appears to be a robust but civil d...
4  request-5  No.\n\nWhile the conversation shows increasing...


In [None]:
decisions_files = pd.DataFrame(list(decision_dict.items()), columns=["custom_id", "filename"])

In [None]:
resp_merged = resp_df.merge(decisions_files, on="custom_id", how="left")

In [None]:
split_cols = resp_merged["response"].str.split("\n\n", n=1, expand=True)
resp_merged["intervention"] = split_cols[0].str.lower().str.strip(". ")
resp_merged["reason"] = split_cols[1]

In [None]:
resp_merged['predicted'] = resp_merged['intervention'].apply(lambda x: 1 if 'no' in str(x) else 0)

In [None]:
resp_merged.head()

Unnamed: 0,custom_id,response,filename,intervention,reason,predicted
0,request-1,Yes.\n\nThe conversation clearly deteriorated ...,1062071645.0_3_deltaless_thread.xml,yes,The conversation clearly deteriorated into per...,0
1,request-2,No.\n\nWhile the conversation began with contr...,1075040167.0_2_deltaless_thread.xml,no,While the conversation began with controversy ...,1
2,request-3,No.\n\nWhile the discussion involves controver...,1075040167.0_5_deltaless_thread.xml,no,While the discussion involves controversial cl...,1
3,request-4,No.\n\nThis appears to be a robust but civil d...,1082495263.0_2_delta_threads.xml,no,This appears to be a robust but civil debate w...,1
4,request-5,No.\n\nWhile the conversation shows increasing...,1102614149.0_2_delta_threads.xml,no,While the conversation shows increasing tensio...,1


In [None]:
df_full = df_sampled.merge(resp_merged, on="filename", how="left")

In [None]:
df_full.to_csv(f'{cwd}/74_summaries_pred.csv', index=False)

In [None]:
correct_matches = (df_full["outcome"] == df_full["predicted"]).sum()
print(f"Number of correct predictions: {correct_matches}")

Number of correct predictions: 41


In [None]:
df_full

Unnamed: 0,custom_id_x,response_x,filename,sentence_count,outcome,custom_id_y,response_y,intervention,reason,predicted
0,request-1,The original poster presents a critical view b...,1062071645.0_1_delta_threads.xml,4,1,request-1,No.\n\nWhile there is clear disagreement and s...,no,While there is clear disagreement and some per...,1
1,request-2,The conversation begins with Speaker1 presenti...,1075040167.0_1_delta_threads.xml,5,1,request-2,No.\n\nWhile the conversation shows increasing...,no,While the conversation shows increasing polari...,1
2,request-3,The original poster presents a defense of farm...,115882088.0_1_delta_threads.xml,5,1,request-3,No.\n\nWhile the conversation contains disagre...,no,While the conversation contains disagreement a...,1
3,request-4,The original poster presents a pessimistic vie...,113567594.0_1_delta_threads.xml,4,1,request-4,No.\n\nThis conversation shows healthy intelle...,no,This conversation shows healthy intellectual d...,1
4,request-5,The original poster presents a concern about p...,1082495263.0_2_delta_threads.xml,5,1,request-5,No.\n\nWhile the conversation shows persistent...,no,While the conversation shows persistent disagr...,1
...,...,...,...,...,...,...,...,...,...,...
92,request-93,The original poster presents a controversial p...,363480636.0_3_deltaless_thread.xml,5,0,request-93,Yes.\n\nThis conversation would have required ...,yes,This conversation would have required moderato...,0
93,request-94,The original poster expresses extreme frustrat...,410977978.0_3_deltaless_thread.xml,5,0,request-94,Yes.\n\nThe conversation required moderator in...,yes,The conversation required moderator interventi...,0
94,request-95,The original poster expresses extreme frustrat...,410977978.0_4_deltaless_thread.xml,5,0,request-95,No.\n\nWhile the conversation contains tension...,no,While the conversation contains tension and so...,1
95,request-96,The original poster expresses deep pessimism a...,442068628.0_4_deltaless_thread.xml,5,0,request-96,No.\n\nThis conversation demonstrates healthy ...,no,This conversation demonstrates healthy discour...,1


In [None]:
df.to_csv('claude_batch_output_sum.csv', index=False)