## Generate Metacognitive Labels using the Della-Inference API

In [1]:
import requests
import json
import pandas as pd

In [2]:
url = "http://localhost:12257/v1/chat/completions"

headers={
    "Content-Type": "application/json",
    "Authorization": "token-abc123"
}

model_name = 'meta-llama/Meta-Llama-3.1-70B-Instruct'

In [3]:
def get_skill_labels_prompt(opening_statement, question):
    system_prompt = """You are a legal expert tasked with analyzing a Supreme Court oral argument question in the context of a provided opening statement. Your goal is to label the question with the minimum number of descriptive characteristics (tags) necessary to fully capture its purpose or nature. Each tag should be concise, written in lower case, and formatted like a Python dictionary key (e.g., questioning_statutory_interpretation).

        ### Instructions:
        1. Identify Tags: Assign as many tags as necessary but only the minimum required to describe the essence of the question.
        2. Provide Reasons: For each tag, explain why it applies to the question.
        3. Format Output: Present your response in a structured JSON format.

        ### Output format:
        Your response must follow this JSON format:
        {
        "tags": [
            {
            "name": "<name_of_characteristic>",
            "reason": "<reason_for_the_characteristic>"
            },
            ...
        ]
        }

        ### Example:
        Opening Statement: "<speaker>John Doe</speaker> <text>Mr. Chief Justice, and may it please the Court: The key question in this case is whether an employer's internal policy can override an employee's federally protected rights under the Whistleblower Protection Act. Our position is that the Act’s protections are paramount, ensuring employees can report wrongdoing without fear of retaliation, regardless of internal company rules. We ask the Court to affirm this vital safeguard.</text>"
        Question: "Wouldn’t your interpretation effectively prevent companies from enforcing any internal policies related to confidentiality?"

        ### Response:
        {
        "tags": [
            {
            "name": "testing_limits",
            "reason": "The question explores the boundaries of the advocate’s argument by asking about its impact on internal company policies."
            },
            {
            "name": "evaluating_policy_consequences",
            "reason": "It examines the broader implications of the interpretation for company operations and confidentiality rules."
            }
        ]
        }

    """
    
    user_prompt = f"""### Your Task:
        Opening Statement: {opening_statement}
        Question: {question}

        ### Response:
    """

    messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": user_prompt}
        ]
    return messages

In [5]:
def get_model_response(messages):

    payload = {
        "model": model_name,
        "messages": messages
    }

    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response

def parse_response(response):
    decoded = response.content.decode('utf-8')
    response_data = json.loads(decoded)
    content = response_data['choices'][0]['message']['content']

    # try:
    #     tags = json.dumps(content)
    # except Exception as e:
    #     print(f"Unable to jsonify response, saving string itself. ERROR: {e}")
    #     tags = str(content)
    # return tags
    return content

In [6]:
def generate_skill_labels(opening_statement, question):
    messages = get_skill_labels_prompt(opening_statement, question)
    response = get_model_response(messages)
    tags = parse_response(response)
    return tags

#### Get labels for 2024 'coherent' questions

In [15]:
input_fp = '../datasets/2024_all_questions_coherence_labeled_Meta-Llama-3.1-70B-Instruct.csv'
df = pd.read_csv(input_fp)
df.head()

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label
0,2024.23-621-t01,petitioner,Clarence Thomas,You --can a consent decree or a default judgm...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
2,2024.23-621-t01,petitioner,"John G. Roberts, Jr.",What do you do with the formulation by your f...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
3,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
4,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent


In [16]:
df_coherent = df[df['label'] == 'coherent']
df_coherent = df_coherent.copy()

In [17]:
# GENERATE FOR ALL
df_coherent['skill_labels_raw'] = df_coherent.apply(
    lambda row: generate_skill_labels(row['opening_statement'], row['question_text']), axis=1
)
df_coherent.head()

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label,skill_labels_raw
1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name"": ""clarifyi..."
3,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name"": ""weighing..."
4,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name..."
5,2024.23-621-t01,petitioner,Ketanji Brown Jackson,When you think about the difference between m...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name..."
7,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But didn't Sole open -- leave open that --th...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name..."


Save File:

In [18]:
out_fp = '../datasets/2024_coherent_metacog_labels_multi.csv'
df_coherent.to_csv(out_fp, index=False)

### Run on incoherent

In [19]:
input_fp = '../datasets/2024_all_questions_coherence_labeled_Meta-Llama-3.1-70B-Instruct.csv'
df = pd.read_csv(input_fp)
df_incoherent = df[df['label'] == 'incoherent']
df_incoherent = df_incoherent.copy()

In [20]:
# GENERATE FOR ALL
df_incoherent['skill_labels_raw'] = df_incoherent.apply(
    lambda row: generate_skill_labels(row['opening_statement'], row['question_text']), axis=1
)
df_incoherent.head()

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label,skill_labels_raw
0,2024.23-621-t01,petitioner,Clarence Thomas,You --can a consent decree or a default judgm...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent,"{\n ""tags"": [\n {\n ""name..."
2,2024.23-621-t01,petitioner,"John G. Roberts, Jr.",What do you do with the formulation by your f...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent,"{\n ""tags"": [\n {\n ""name"": ""evaluati..."
6,2024.23-621-t01,petitioner,Ketanji Brown Jackson,What about the Chief Justice's example? In t...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent,"{\n""tags"": [\n {\n ""name"": ""referencing_..."
8,2024.23-621-t01,petitioner,Elena Kagan,"And -- and -- and couldn't a state do that, y...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent,"{\n ""tags"": [\n {\n ""name..."
11,2024.23-621-t01,petitioner,"Samuel A. Alito, Jr.",If there is very strong evidence that the gov...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent,"{\n ""tags"": [\n {\n ""name..."


Save file:

In [21]:
out_fp = '../datasets/2024_incoherent_metacog_labels_multi.csv'
df_coherent.to_csv(out_fp, index=False)

### Test on sample

In [10]:
# # test on subsample
df_new = df_coherent.head(2).copy()
df_new['skill_labels_raw'] = df_new.apply(
    lambda row: generate_skill_labels(row['opening_statement'], row['question_text']), axis=1
)
df_new

Unnamed: 0,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label,skill_labels_raw
1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name..."
3,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""tags"": [\n {\n ""name..."


In [14]:
sample_resp = json.loads(df_new['skill_labels_raw'][3])
sample_resp

{'tags': [{'name': 'challenging_interpretation',
   'reason': "The question disputes the advocate's interpretation of a preliminary injunction's impact on prevailing party status."},
  {'name': 'arguing_by_analogy',
   'reason': "It draws an analogy to a consent decree case, suggesting that the legislature's action is similar to a unilateral settlement."},
  {'name': 'evaluating_outcome_evidence',
   'reason': 'It emphasizes the fact that the parties obtained everything they wanted in the interim period, implying that this outcome should influence the determination of prevailing party status.'},
  {'name': 'questioning_policy_implications',
   'reason': "It raises questions about the practical implications of the advocate's proposed bright-line rule, specifically with regards to fee allocation."},
  {'name': 'weighing_circumstantial_evidence',
   'reason': 'It considers various factors (likelihood of success, interim relief, and legislative action) to argue that they collectively suppo