In [44]:
from openai import OpenAI
import os
import pandas as pd
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from typing import List
from tqdm import tqdm
import json, csv
load_dotenv('/Users/guida/llm_argument_tasks/.env')

api_key = os.environ.get('OPENAI_API_KEY')

In [28]:
class ArgumentClassification(BaseModel):
    id: str = Field(description="The ID the comment being analyzed")    
    label: int = Field(description="The label associated with the argument (0 or 1)") 

## Gay Marriage

In [33]:
arguments_gm = [
    "It is discriminatory to refuse gay couples the right to marry",
    "Gay couples should be able to take advantage of the fiscal and legal benefits of marriage",
    "Marriage is about more than procreation, therefore gay couples should not be denied the right to marry due to their biology",
    "Gay couples can declare their union without resort to marriage",
    "Gay marriage undermines the institution of marriage, leading to an increase in out of wedlock births and divorce rates",
    "Major world religions are against gay marriages",
    "Marriage should be between a man and a woman"
]

In [42]:
def classify_text_gpt(id: str, comment_text: str, argument: str) -> dict:
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
        {"role": "system", "content": f"""
        Analyze the given comment about gay marriage in relation to a specific argument. Your need to:
        Identify if the comment makes use the given argument. If it does, assign the label 1. If it does not, assign the label 0.
        Do NOT use any other label.
        Do NOT include the comment or the argument in the response.
        
        The argument to analyze is: {argument}
        
        Provide your response in the following JSON format:
        
        {{
            "id": "{id}",
            "label": "the label for the use of the argument in the comment"
        }}
        
        Analyze the following comment in relation to the given argument
        """},
        {"role": "user", "content": comment_text},
        ],
        response_format=ArgumentClassification,
    )

    return completion.choices[0].message.content

In [45]:
gm = pd.read_csv('../../clean_data/GM_structured.csv')

def process_comments_with_arguments(df: pd.DataFrame) -> List[dict]:
    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing comments"):
        comment_id = row['id'] 
        comment_text = row['comment_text']  
        argument_text = row['argument_text']

        try:
            classification = classify_text_gpt(comment_id, comment_text, argument_text)
            results.append(classification)

        except json.JSONDecodeError as e:
            print(f"JSONDecodeError for comment: {comment_text[:50]}... - Error: {e}")
            continue

        except Exception as e:
            print(f"An unexpected error occurred for comment: {comment_text[:50]}... - Error: {e}")
            continue

    return results

classifications = process_comments_with_arguments(gm)

output_file = 'llm_argument_tasks/output_files/gpt4o-mini/comarg_gm_argument_identification.json'
with open(output_file, 'w') as f:
    json.dump(classifications, f, indent=2)

Processing comments: 100%|██████████| 1285/1285 [15:42<00:00,  1.36it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'llm_argument_tasks/output_files/gpt40-mini/comarg_gm_argument_identification.json'

In [49]:
output_file = 'comarg_gm_argument_identification.json'
with open(output_file, 'w') as f:
    json.dump(classifications, f, indent=2)

## Under God in Pledge

In [50]:
arguments_ugip = [
    "Likely to be seen as a state-sanctioned condemnation of religion",
    "The principles of democracy regulate that the wishes of American Christians, who are a majority, are honored",
    "Under God is part of American tradition and history",
    "Implies ultimate power on the part of the state",
    "Removing under god would promote religious tolerance",
    "Separation of state and religion"
]

In [51]:
def classify_text_gpt(id: str, comment_text: str, argument: str) -> dict:
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
        {"role": "system", "content": f"""
        Analyze the given comment about whether "Under God" should be included in the US Pledge of Alliance. Your need to:
        Identify if the comment makes use the given argument. If it does, assign the label 1. If it does not, assign the label 0.
        Do NOT use any other label.
        Do NOT include the comment or the argument in the response.
        
        The argument to analyze is: {argument}
        
        Provide your response in the following JSON format:
        
        {{
            "id": "{id}",
            "label": "the label for the use of the argument in the comment"
        }}
        
        Analyze the following comment in relation to the given argument
        """},
        {"role": "user", "content": comment_text},
        ],
        response_format=ArgumentClassification,
    )

    return completion.choices[0].message.content

In [55]:
ugip

Unnamed: 0,id,comment_text,argument_text,label
0,414721685arg1,"Simple, maybe I believe in Allah, or the flyin...",Separation of state and religion,3
1,414721685arg2,"Simple, maybe I believe in Allah, or the flyin...",Removing under god would promote religious tol...,4
2,414721685arg3,"Simple, maybe I believe in Allah, or the flyin...",Under God is part of American tradition and h...,3
3,414721643arg1,As an devout atheist I strongly oppose this. I...,Separation of state and religion,5
4,414721643arg3,As an devout atheist I strongly oppose this. I...,Under God is part of American tradition and h...,3
...,...,...,...,...
1008,414721954arg2,"We are a nation formed by men of faith, reflec...",Removing under god would promote religious tol...,3
1009,414721954arg3,"We are a nation formed by men of faith, reflec...",Under God is part of American tradition and h...,3
1010,414721680arg1,We should follow the Constitution and keep Chu...,Separation of state and religion,5
1011,414721680arg2,We should follow the Constitution and keep Chu...,Removing under god would promote religious tol...,3


In [57]:
ugip = pd.read_csv('../../clean_data/UGIP_structured.csv')

def process_comments_with_arguments(df: pd.DataFrame) -> List[dict]:
    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing comments"):
        comment_id = row['id'] 
        comment_text = row['comment_text']  
        argument_text = row['argument_text']

        try:
            classification = classify_text_gpt(comment_id, comment_text, argument_text)
            results.append(classification)

        except json.JSONDecodeError as e:
            print(f"JSONDecodeError for comment: {comment_text[:50]}... - Error: {e}")
            continue

        except Exception as e:
            print(f"An unexpected error occurred for comment: {comment_text[:50]}... - Error: {e}")
            continue

    return results

classifications = process_comments_with_arguments(ugip)

output_file = 'comarg_ugip_argument_identification_gpt.json'
with open(output_file, 'w') as f:
    json.dump(classifications, f, indent=2)

Processing comments:   0%|          | 0/1013 [00:00<?, ?it/s]

Processing comments: 100%|██████████| 1013/1013 [12:57<00:00,  1.30it/s]


In [69]:
input_ugip = '/Users/guida/llm_argument_tasks/output_files/gpt4o-mini/comarg_ugip_argument_identification.json'
output_ugip = '/Users/guida/llm_argument_tasks/output_files/gpt4o-mini/comarg_ugip_argument_identification.csv'

with open(input_gm, 'r') as f:
    data = json.load(f)
    data = [json.loads(item) for item in data]

with open(output_gm, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)

    writer.writerow(['comment_text', 'argument_text', 'label'])
    
    for item in data:
        writer.writerow([item['id'], item['label']])

In [68]:
input_gm = '/Users/guida/llm_argument_tasks/output_files/gpt4o-mini/comarg_gm_argument_identification.json'
output_gm = '/Users/guida/llm_argument_tasks/output_files/gpt4o-mini/comarg_gm_argument_identification.csv'

with open(input_gm, 'r') as f:
    data = json.load(f)
    data = [json.loads(item) for item in data]

with open(output_gm, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)

    writer.writerow(['comment_text', 'argument_text', 'label'])
    
    for item in data:
        writer.writerow([item['id'], item['label']])