## Generate Coherence Judgments using the Della-Inference API

In [1]:
import requests
import json
import re
import pandas as pd
import os

In [2]:
url = "http://localhost:12257/v1/chat/completions"

headers={
    "Content-Type": "application/json",
    "Authorization": "token-abc123"
}

model_name = 'meta-llama/Meta-Llama-3.1-70B-Instruct'

In [3]:
def get_coherence_prompt(opening_statement, question):
    system_prompt = """Below is an opening statement from a Supreme Court case and a question asked during the argument. Your task is to determine whether the question logically follows from the opening statement alone.

    Repond with a only a single word. If the question makes sense based solely on the content of the opening statement, label it as "coherent." If additional context from other parts of the argument is needed for the question to make sense, label it as "incoherent."
    """
    user_prompt = f"""
    ### Opening Statement:
    {opening_statement}
    
    ### Question:
    {question}

    ### Label:
    """
    messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": user_prompt}
        ]
    return messages

def get_model_response(messages):

    payload = {
        "model": model_name,
        "messages": messages
    }

    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response

def parse_response(response):
    decoded = response.content.decode('utf-8')
    response_data = json.loads(decoded)
    label = response_data['choices'][0]['message']['content']
    return label

def get_coherence_judgments(opening_statement, question):
    messages = get_coherence_prompt(opening_statement, question)
    response = get_model_response(messages)
    label = parse_response(response)
    # return json string of list of questions to store in pandas df
    # return json.dumps(questions)
    return label



### Generate for historical data

In [18]:
### CHANGE THIS TO GENERATE FOR DIFF CHUNKS

### NEXT TODO IS 2018, chunk 2
year = 'TO_FILL'
chunk = 'TO_FILL'
input_dir = f'../datasets/historical_questions/{year}/slices'
input_filename = f'{year}_{chunk}_questions_from_current_justices'
input_fp = f'{input_dir}/{input_filename}.csv'

out_dir = f'../datasets/coherence/'
model = model_name.split('/')[-1]
out_subdir = os.path.join(out_dir, model)
os.makedirs(out_subdir, exist_ok=True)
out_fp = f'{out_subdir}/{input_filename}.csv'

In [19]:
df = pd.read_csv(input_fp)
df.head()

Unnamed: 0.1,Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,year
0,34267,2022.21-1449-t01,petitioner,Amy Coney Barrett,"You know, and so the government points out tha...",<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022
1,34268,2022.21-1449-t01,petitioner,Amy Coney Barrett,Doesn't Garmon do something a little bit diffe...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022
2,34269,2022.21-1449-t01,petitioner,Elena Kagan,Why -- why is it that there is such a sharp di...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022
3,34270,2022.21-1449-t01,petitioner,Ketanji Brown Jackson,-- there was not a Garmon issue in Bill Johnso...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022
4,34271,2022.21-1449-t01,petitioner,Ketanji Brown Jackson,Could a state court decide -- if this was sent...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022


#### Run on slice and save

In [20]:
df['label'] = df.apply(
    lambda row: get_coherence_judgments(row['opening_statement'], row['question_text']), axis=1
)
df.head()

Unnamed: 0.1,Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,year,label
0,34267,2022.21-1449-t01,petitioner,Amy Coney Barrett,"You know, and so the government points out tha...",<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022,coherent
1,34268,2022.21-1449-t01,petitioner,Amy Coney Barrett,Doesn't Garmon do something a little bit diffe...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022,Coherent
2,34269,2022.21-1449-t01,petitioner,Elena Kagan,Why -- why is it that there is such a sharp di...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022,coherent
3,34270,2022.21-1449-t01,petitioner,Ketanji Brown Jackson,-- there was not a Garmon issue in Bill Johnso...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022,coherent
4,34271,2022.21-1449-t01,petitioner,Ketanji Brown Jackson,Could a state court decide -- if this was sent...,<speaker>Noel J. Francisco</speaker><text>Mr. ...,2022,coherent


In [21]:
df.to_csv(out_fp, index=False)

#### Loop historical data

In [26]:
year = 2018
for i in range(2):
    chunk = i
    input_dir = f'../datasets/historical_questions/{year}/slices'
    input_filename = f'{year}_{chunk}_questions_from_current_justices'
    input_fp = f'{input_dir}/{input_filename}.csv'

    out_dir = f'../datasets/coherence/'
    model = model_name.split('/')[-1]
    out_subdir = os.path.join(out_dir, model)
    os.makedirs(out_subdir, exist_ok=True)
    out_fp = f'{out_subdir}/{input_filename}.csv'
    
    df = pd.read_csv(input_fp)
    
    print(f"Running on chunk {i}")
    df['label'] = df.apply(
        lambda row: get_coherence_judgments(row['opening_statement'], row['question_text']), axis=1
    )
    df.to_csv(out_fp, index=False)
    print(f"Saved results for chunk {i} to {out_fp}")

Running on chunk 0
Saved results for chunk 0 to ../datasets/coherence/Meta-Llama-3.1-70B-Instruct/2018_0_questions_from_current_justices.csv
Running on chunk 1
Saved results for chunk 1 to ../datasets/coherence/Meta-Llama-3.1-70B-Instruct/2018_1_questions_from_current_justices.csv


#### Sanity test with subsample

In [None]:
df_new = df.head(2).copy()
df_new['label'] = df_new.apply(
    lambda row: get_coherence_judgments(row['opening_statement'], row['question_text']), axis=1
)
df_new

In [None]:
df_new.to_csv(out_fp, index=False)

### Generate for 2024

In [None]:
input_fp = '../datasets/2024_all_questions_full_text_merged.csv'
df = pd.read_csv(input_fp)
df.head()

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text
0,2024.23-621-t01,petitioner,Clarence Thomas,You --can a consent decree or a default judgm...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
2,2024.23-621-t01,petitioner,"John G. Roberts, Jr.",What do you do with the formulation by your f...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
3,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
4,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."


Generate labels for full dataset:

In [30]:
df['label'] = df.apply(
    lambda row: get_coherence_judgments(row['opening_statement'], row['question_text']), axis=1
)
df.head()

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label
0,2024.23-621-t01,petitioner,Clarence Thomas,You --can a consent decree or a default judgm...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
2,2024.23-621-t01,petitioner,"John G. Roberts, Jr.",What do you do with the formulation by your f...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
3,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
4,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent


In [31]:
df['label'].value_counts()

coherent      486
incoherent    321
Coherent       49
Incoherent     23
coherent.       2
Coherent.       2
Name: label, dtype: int64

In [35]:
def clean_label(label):
    return label.lower().strip('.')

In [36]:
df['label'] = df['label'].apply(clean_label)
df['label'].value_counts()

coherent      539
incoherent    344
Name: label, dtype: int64

Save dataset as csv

In [None]:
model_id = model_name.split('/')[-1]
output_fp = f'../datasets/2024_all_questions_coherence_labeled_{model_id}.csv'
df.to_csv(output_fp, index=False)

### Sanity test with `transcript_id` = `2024.23-217-t01`

In [23]:
df_217 = df[df['transcript_id'] == '2024.23-217-t01']
df_217 = df_217[df_217['question_addressee'] == 'petitioner']
df_217 = df_217.copy()
df_217

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text
354,2024.23-217-t01,petitioner,Clarence Thomas,"Other than in the context of actual malice, c...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
355,2024.23-217-t01,petitioner,Clarence Thomas,How would you respond -- what do you have to ...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
356,2024.23-217-t01,petitioner,"John G. Roberts, Jr.",How are we supposed to -- you make the argum...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
357,2024.23-217-t01,petitioner,Brett M. Kavanaugh,"Well, how do we apply the particularly import...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
358,2024.23-217-t01,petitioner,Brett M. Kavanaugh,"Are you saying -- and, relatedly, are you sa...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
359,2024.23-217-t01,petitioner,Sonia Sotomayor,Can I just ask a practical question? You ask...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
360,2024.23-217-t01,petitioner,Sonia Sotomayor,All right? So I don't think we should get in...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
361,2024.23-217-t01,petitioner,Sonia Sotomayor,"On the last issue you raised, which was the c...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
362,2024.23-217-t01,petitioner,Sonia Sotomayor,You're using it in the sense of what they per...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."
363,2024.23-217-t01,petitioner,Elena Kagan,The court of appeals here applied its own cir...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ..."


In [24]:
df_217['label'] = df_217.apply(
    lambda row: get_coherence_judgments(row['opening_statement'], row['question_text']), axis=1
)
df_217

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label
354,2024.23-217-t01,petitioner,Clarence Thomas,"Other than in the context of actual malice, c...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
355,2024.23-217-t01,petitioner,Clarence Thomas,How would you respond -- what do you have to ...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
356,2024.23-217-t01,petitioner,"John G. Roberts, Jr.",How are we supposed to -- you make the argum...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
357,2024.23-217-t01,petitioner,Brett M. Kavanaugh,"Well, how do we apply the particularly import...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
358,2024.23-217-t01,petitioner,Brett M. Kavanaugh,"Are you saying -- and, relatedly, are you sa...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
359,2024.23-217-t01,petitioner,Sonia Sotomayor,Can I just ask a practical question? You ask...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
360,2024.23-217-t01,petitioner,Sonia Sotomayor,All right? So I don't think we should get in...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
361,2024.23-217-t01,petitioner,Sonia Sotomayor,"On the last issue you raised, which was the c...",<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
362,2024.23-217-t01,petitioner,Sonia Sotomayor,You're using it in the sense of what they per...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
363,2024.23-217-t01,petitioner,Elena Kagan,The court of appeals here applied its own cir...,<speaker>Lisa S. Blatt</speaker><text> Mr. Chi...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent


In [19]:
# ### Figure out parsing logic
# resp = df_217['label'].iloc[2]
# decoded = resp.content.decode('utf-8')
# response_data = json.loads(decoded)
# content = response_data['choices'][0]['message']['content']
# content

'coherent'

In [26]:
llama_labels = df_217['label']

In [27]:
# labels generated by chat GPT-4o
gpt_labels = [
    "coherent",
    "incoherent",
    "coherent",
    "coherent",
    "coherent",
    "incoherent",
    "incoherent",
    "coherent",
    "incoherent",
    "incoherent",
    "coherent",
    "coherent",
    "coherent",
    "incoherent",
    "coherent",
    "incoherent",
    "incoherent",
    "incoherent",
    "incoherent",
    "coherent",
    "incoherent",
    "incoherent",
    "incoherent",
    "incoherent",
    "coherent",
    "incoherent",
    "incoherent",
    "incoherent"
]

In [28]:
for l, g in zip(llama_labels, gpt_labels):
    print(f"Llama: {l}, GPT: {g}")

Llama: coherent, GPT: coherent
Llama: incoherent, GPT: incoherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: incoherent
Llama: coherent, GPT: incoherent
Llama: incoherent, GPT: coherent
Llama: incoherent, GPT: incoherent
Llama: incoherent, GPT: incoherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: coherent
Llama: incoherent, GPT: incoherent
Llama: coherent, GPT: coherent
Llama: coherent, GPT: incoherent
Llama: incoherent, GPT: incoherent
Llama: coherent, GPT: incoherent
Llama: Coherent, GPT: incoherent
Llama: coherent, GPT: coherent
Llama: Coherent, GPT: incoherent
Llama: coherent, GPT: incoherent
Llama: incoherent, GPT: incoherent
Llama: incoherent, GPT: incoherent
Llama: coherent, GPT: coherent
Llama: incoherent, GPT: incoherent
Llama: incoherent, GPT: incoherent


**Note**: From the above, it appears that llama tends to label more questions as coherent than incoherent compared to GPT4o