In [11]:
from openai import OpenAI
import os
import pandas as pd
from pydantic import BaseModel, Field
from tqdm import tqdm
import json
from random import sample
import jsonlines as jsonl
from openai import OpenAI


from together import Together
import time
from tqdm import tqdm
import accelerate
from dotenv import load_dotenv

load_dotenv()

api_key = os.environ.get('TOGETHER_API_KEY')
client = Together(api_key=api_key)


In [12]:
class ArgumentClassification(BaseModel):
    id: str = Field(description="The exact ID of the comment being analyzed")
    label: int = Field(description="The label associated with the argument (0 or 1)")

In [13]:
# Dictionary for label-to-argument mappings for each topic
label_mapping = {1: 1, 2: 1, 3: 0, 4: 1, 5:1}

In [14]:
def prep_fewshot_samples(samples_file, topic, n):
    df = pd.read_csv(samples_file)
    ids = df['id'].to_list()
    sampled = sample(ids, n)
    print(sampled)
    df = df[df['id'].isin(sampled)]
    comment = df.iloc[0]['comment_text']
    output = f"Comment: {comment}\n The following arguments are present (1) or not present (0) in this comment:\n"
    #print(output)
    for i, row in df.iterrows():
        argument = row['argument_text']
        output = f"{output} Argument {i}: {argument}\n"
        label = label_mapping[row['label']]
        output = f"{output} Label: {label}\n\n"
    print(output)
    return output


In [15]:
def classify_text(id: str, comment_text: str, topic: str, argument: str, samples: str) -> dict:    
    extract = client.chat.completions.create(
        messages=[
            {"role": "user", "content": f"""
            Analyze the given comment in relation to a specific argument about {topic}. You need to:
            Identify if the comment makes use of the given argument. If it does, assign the label 1. If it does not, assign the label 0.
            Do NOT use any other label.
            Do NOT include the comment or the argument in the response.
            
            The argument to analyze is: {argument}
            
            Provide your response in the following JSON format:
            
            {{
                "id": "{id}",
                "label": "the label for the use of the argument in the comment"
            }}
            
            Some examples:

            {samples}


            Analyze the following comment in relation to the given argument:
            """},
        {"role": "user", "content": comment_text},
        ],
        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        temperature=0,
        top_k=1,
        top_p=1,
        response_format={
            "type": "json_object",
            "schema": ArgumentClassification.model_json_schema(),
        }
    )
    
    return json.loads(extract.choices[0].message.content)

In [16]:
def process_dataframe_comments(df: pd.DataFrame, topic: str, file_name: str, samples: str, n: int):
    
    with jsonl.open(f'comarg_{file_name}_identification_llama_{n}shot.jsonl', mode='w') as writer:
        for idx, row in tqdm(df.iterrows(), desc="Processing comments", unit="comment", total=len(df)):
            comment_id = row['id'] 
            comment_text = row['comment_text']  
            argument = row['argument_text']  
            try:
                classification = classify_text(
                    id=comment_id, 
                    comment_text=comment_text,  
                    topic=topic,
                    argument=argument,
                    samples=samples
                )
                writer.write(classification)
                #print(classification)
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError for comment: {comment_text[:50]}... - Error: {e}")
                error_entry = {"id": comment_id, "label": 0}
                writer.write(error_entry)
                continue
            except Exception as e:
                print(f"An unexpected error occurred for comment: {comment_text[:50]}... - Error: {e}")
                error_entry = {"id": comment_id, "label": 0}
                writer.write(error_entry)
            continue
        

## Gay Marriage

In [17]:
ab = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments_main.csv')
topic = 'gay marriage'
file_name = 'gm'
n = 5
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments_shots.csv', topic, n)

process_dataframe_comments(ab, topic, file_name, samples, n)

['134arg6', '134arg5', '134arg4', '134arg7', '134arg1']
Comment: Same sex marriages do in their most basic form, surmount to treasonous and sedicious behavior among a few member of society.  Participation therein does not produce an offspring of such that can shoulder defense of any state, our federalist republic form of government, constituent representation, taxation, or the ability to pay down the national debt.  In short, it is a drain on society that the social order of what makes states, states; nations, nations and governments, governments.  As such, we can not allow it to stand in anyrecognizable or protected form.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 0: Gay couples can declare their union without resort to marriage
 Label: 0

 Argument 3: It is discriminatory to refuse gay couples the right to marry
 Label: 0

 Argument 4: Major world religions are against gay marriages
 Label: 0

 Argument 5: Marriage is about more than procre

Processing comments: 100%|██████████| 1379/1379 [26:49<00:00,  1.17s/comment] 


In [18]:
ugip = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_main.csv')
topic = 'whether "Under God" should appear in the US Pledge of Allegiance'
file_name = 'ugip'
n = 5
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_shots.csv', topic, n)

process_dataframe_comments(ugip, topic, file_name, samples, n)

['414721704arg5', '414721704arg4', '414721704arg3', '414721704arg1', '414721704arg2']
Comment: Saying under God does not have to refer to a Christian God, or even a strictly religious God. I find it uplifting to state that we are all united under a greater power, whether it be that of a deity of doctrine or a collective, spiritual sense of our country's connectedness. Under God can mean different things to different people, but it is a good thing to recognize a power and purpose that is greater than any one individual.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 0: Separation of state and religion
 Label: 0

 Argument 1: Removing under god would promote religious tolerance
 Label: 0

 Argument 2: Under God  is part of American tradition and history
 Label: 0

 Argument 3: Likely to be seen as a state sanctioned condemnation of religion
 Label: 0

 Argument 4: Implies ultimate power on the part of the state
 Label: 0




Processing comments: 100%|██████████| 2094/2094 [44:12<00:00,  1.27s/comment]


In [19]:
ab = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments_main.csv')
topic = 'gay marriage'
file_name = 'gm'
n = 1
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments_shots.csv', topic, n)

process_dataframe_comments(ab, topic, file_name, samples, n)

['134arg2']
Comment: Same sex marriages do in their most basic form, surmount to treasonous and sedicious behavior among a few member of society.  Participation therein does not produce an offspring of such that can shoulder defense of any state, our federalist republic form of government, constituent representation, taxation, or the ability to pay down the national debt.  In short, it is a drain on society that the social order of what makes states, states; nations, nations and governments, governments.  As such, we can not allow it to stand in anyrecognizable or protected form.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 1: Gay couples should be able to take advantage of the fiscal and legal benefits of marriage
 Label: 1




Processing comments: 100%|██████████| 1379/1379 [28:31<00:00,  1.24s/comment] 


In [20]:
ugip = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_main.csv')
topic = 'whether "Under God" should appear in the US Pledge of Allegiance'
file_name = 'ugip'
n = 1
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_shots.csv', topic, n)

process_dataframe_comments(ugip, topic, file_name, samples, n)

['414721704arg2']
Comment: Saying under God does not have to refer to a Christian God, or even a strictly religious God. I find it uplifting to state that we are all united under a greater power, whether it be that of a deity of doctrine or a collective, spiritual sense of our country's connectedness. Under God can mean different things to different people, but it is a good thing to recognize a power and purpose that is greater than any one individual.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 1: Removing under god would promote religious tolerance
 Label: 0




Processing comments: 100%|██████████| 2094/2094 [45:28<00:00,  1.30s/comment]
