## Generate valence classification tags using della inference API

In [1]:
import requests
import json
import pandas as pd

In [2]:
url = "http://localhost:12257/v1/chat/completions"

headers={
    "Content-Type": "application/json",
    "Authorization": "token-abc123"
}

model_name = 'meta-llama/Meta-Llama-3.1-70B-Instruct'

In [3]:
def get_valence_classification_prompt(opening_statement, question):
    system_prompt = """You are an expert assistant trained to valence, or sentiment, of the questions asked by justices during oral arguments. Your task is to identify the competitiveness of a given question based on the advocate's opening statement and the text of the question itself.

        ### Instructions:
        During oral arguments, Supreme Court justices will often have an opinion on the case. This opinion 
        may be influenced by a Justice's ideological predisposition and may influence the questions that they ask
        an advocation. For example, if a petitioner made an opening statement appealing to progressive values,
        a progressive justice may ask cooperative questions that allow the petitioner to present his argument more 
        in-depth. On the other hand, that same progressive justice may ask the other-side-championing respondent 
        more competitive questions that critique the respondent's opening statement because the respondent's argument is
        less aligned with the justice's opinion.

        Your task, given an opening statement of an advocate (either the petitioner or respondent) and a justice's 
        question is to classify the degree of cooperativeness/competitiveness of justice's question with regards to 
        an advocate's opening statement.

        Your output should follow a likert scale with your classifications ranging from "Very Competitive" to "Very Cooperative." 
        More specifically, you should classify a question as either:
        - "Very Competitive": The question directly critiques the points of the opening statement.
        - "Competitive": The question tries to critique the points of the opening statement.
        - "Neutral": The question neither critiques nor supports the opening statment.
        - "Supportive": The question tries to support the points in the opening statement.
        - "Very Supportive": The question directly supports the points of the opening statement.

        ### Output format:
        Your response must follow this JSON format:
        {
        "classification": "<Category Name>",
        "reasoning": "<A brief explanation for the classification>"
        }


        ### Example:
        Opening Statement: "Your Honors, we contend that the statute should be interpreted in light of its original intent, which clearly establishes a narrow scope of application to avoid overreach."
        Question: "If we accept your interpretation, how would it apply to cases involving modern technologies not contemplated when the statute was written?"
        
        ### Response:
        {
        "classification": "Competitive",
        "reasoning": "The question challenges the advocate's statement. It requests more information. If the question  
        directly criticized the advocate's point ("This seems incorrect"), it would be "Very Competitive," but because it 
        only requests more information, the question is "Competitive."
        }
    """
    
    user_prompt = f"""### Your Task:
        Opening Statement: {opening_statement}
        Question: {question}

        ### Response:
    """

    messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": user_prompt}
        ]
    return messages

In [4]:
def get_model_response(messages):

    payload = {
        "model": model_name,
        "messages": messages
    }

    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response

def parse_response(response):
    decoded = response.content.decode('utf-8')
    response_data = json.loads(decoded)
    content = response_data['choices'][0]['message']['content']

    # try:
    #     tags = json.dumps(content)
    # except Exception as e:
    #     print(f"Unable to jsonify response, saving string itself. ERROR: {e}")
    #     tags = str(content)
    # return tags
    
    return content

In [5]:
def classify_questions_valence(opening_statement, question):
    messages = get_valence_classification_prompt(opening_statement, question)
    response = get_model_response(messages)
    tags = parse_response(response)
    return tags

In [20]:
input_dir = '../datasets/2024_questions/with_qid'
out_dir = '../datasets/llm_outputs/valence'

### Run on LLM generated questions

In [None]:
model_suffix = model_name.split('/')[1]
input_fp = f'{input_dir}/2024_llm_questions_{model_suffix}.csv'
df_llm = pd.read_csv(input_fp)
df_llm.head()

In [None]:
# GENERATE FOR ALL
df_llm['valence_classification_raw'] = df_llm.apply(
    lambda row: classify_questions_valence(row['opening_statement'], row['question_text']), axis=1
)
df_llm.head()

Save file:

In [None]:
out_fp = f'{out_dir}/2024_llm_questions_{model_suffix}_valence_classification.csv'
df_coherent.to_csv(out_fp, index=False)

### Run on actual questions

#### Get labels for 2024 'coherent' questions

In [14]:
input_fp = f'{input_dir}/2024_all_questions_coherence_labeled_Meta-Llama-3.1-70B-Instruct_qid.csv'
df = pd.read_csv(input_fp)
df.head()

Unnamed: 0.1,Unnamed: 0,question_id,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label
0,0,q_fb9deabd,2024.23-621-t01,petitioner,Clarence Thomas,You --can a consent decree or a default judgm...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
1,1,q_dd1235f1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
2,2,q_efc9f721,2024.23-621-t01,petitioner,"John G. Roberts, Jr.",What do you do with the formulation by your f...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",incoherent
3,3,q_e843e146,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent
4,4,q_e052c4b2,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent


In [15]:
df_coherent = df[df['label'] == 'coherent']
df_coherent = df_coherent.copy()

In [9]:
# # # test on subsample
# df_new = df_coherent.head(2).copy()
# df_new['valence_classification_raw'] = df_new.apply(
#     lambda row: classify_questions_valence(row['opening_statement'], row['question_text']), axis=1
# )
# df_new

Unnamed: 0.1,Unnamed: 0,question_id,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label,valence_classification_raw
1,1,q_dd1235f1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""classification"": ""Competitive"",\n ..."
3,3,q_e843e146,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n""classification"": ""Very Competitive"",\n""rea..."


In [17]:
# GENERATE FOR ALL
df_coherent['valence_classification_raw'] = df_coherent.apply(
    lambda row: classify_questions_valence(row['opening_statement'], row['question_text']), axis=1
)
df_coherent.head()

Unnamed: 0.1,Unnamed: 0,question_id,transcript_id,question_addressee,justice,question_text,opening_statement,full_text,label,valence_classification_raw
1,1,q_dd1235f1,2024.23-621-t01,petitioner,Clarence Thomas,But I thought your argument hinged on a court...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""classification"": ""Competitive"",\n ""reas..."
3,3,q_e843e146,2024.23-621-t01,petitioner,Elena Kagan,"Well, it's -- it's true that it's only a lik...",<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n""classification"": ""Very Competitive"",\n""rea..."
4,4,q_e052c4b2,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But it's not that determination that's making...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n ""classification"": ""Competitive"",\n ""..."
5,5,q_6a41e1e0,2024.23-621-t01,petitioner,Ketanji Brown Jackson,When you think about the difference between m...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n""classification"": ""Competitive"",\n""reasonin..."
7,7,q_8ddfdd01,2024.23-621-t01,petitioner,Ketanji Brown Jackson,But didn't Sole open -- leave open that --th...,<speaker>Erika L. Maley</speaker><text> Mr. Ch...,"<speaker>John G. Roberts, Jr.</speaker><text> ...",coherent,"{\n""classification"": ""Competitive"",\n""reasonin..."


Save File:

In [21]:
out_fp = f'{out_dir}/2024_coherent_valence_classification.csv'
df_coherent.to_csv(out_fp, index=False)

#### Get labels for 2024 'incoherent' questions

In [None]:
# df = pd.read_csv(input_fp)
# df_incoherent = df[df['label'] == 'incoherent']
# df_incoherent = df_incoherent.copy()
# df_incoherent.head()