In [1]:
# Data manipulation
import os
import pandas as pd
import csv
import json
import jsonlines as jl
from pathlib import Path
from dotenv import load_dotenv
from typing import List
import re

# Machine Learning
import torch
import torch.nn as nn
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Transformers and Langchain
from pydantic import ValidationError, BaseModel, Field

# API and utility
from huggingface_hub import login
from together import Together
import time
from tqdm import tqdm
import accelerate

load_dotenv('/Users/guida/llm_argument_tasks/.env')

api_key = os.environ.get('TOGETHER_API_KEY')
print(api_key)
client = Together(api_key=api_key)

c4cec04462371915473f823969f423425c33068846f9a064b317758e6b4f4e24


In [2]:
def get_llm(model_type):
    if model_type == "llama":
        return client.chat.completions.create(
            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
            max_tokens=512,
            messages=[],
            temperature=0.7,
            top_p=0.7,
            top_k=50,
            repetition_penalty=1,
            stop=["<|eot_id|>"]
        )
llm = get_llm("llama")

In [3]:
class ArgumentClassification(BaseModel):
    comment: str = Field(description="The text of the comment being analyzed")     

    argument: str = Field(description="The argument being checked in the comment")
    
    label: int = Field(description="The label associated with the argument (0 or 1)") 

In [4]:
def classify_text(messages: List[dict]) -> dict:
    extract = client.chat.completions.create(
        messages=messages,
        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        response_format={
            "type": "json_object",
            "schema": ArgumentClassification.model_json_schema(),
        }
    )
    
    return json.loads(extract.choices[0].message.content)

In [5]:
def argument_identification(comment_text: str, argument: str, topic) -> List[dict]:
    messages = [
        {"role": "system", "content": f"""
        You are an AI assistant tasked with analyzing comments about {topic} in relation to a specific argument. You need to:
        Identify if the comment makes use the given argument. 
        If the comment makes use or refer to the argument, assign the label 1. Otherwise, assign the label 0.

        The argument to analyze is: {argument}
        
        Provide your response in the following JSON format:
        {{
            "comment": "full text of the comment",
            "argument": "the argument being analyzed",
            "label": "the label for the use of the argument in the comment"
        }}
        
        Do NOT add text or explanations.
        
        Analyze the following comment in relation to the given argument:
        """},
        {"role": "user", "content": comment_text}
    ]
    return messages

## Lists of arguments

In [6]:
argument_lists = {
    'arguments_ab': [
        "Abortion is a woman's right.",
        "Rape victims need it to be legal.",
        "A fetus is not a human yet, so it's okay to abort.",
        "Abortion should be allowed when a mother's life is in danger.",
        "Unwanted babies are ill-treated by parents and/or not always adopted.",
        "Birth control fails at times and abortion is one way to deal with it.",
        "Abortion is not murder.",
        "Mother is not healthy/financially solvent.",
        "Put baby up for adoption.",
        "Abortion kills a life.",
        "An unborn baby is a human and has the right to live.",
        "Be willing to have the baby if you have sex.",
        "Abortion is harmful for women.",
    ],
    'arguments_gm': [
        "Gay marriage is like any other marriage.",
        "Gay people should have the same rights as straight people.",
        "Gay parents can adopt and ensure a happy life for a baby.",
        "People are born gay.",
        "Religion should not be used against gay rights.",
        "Religion does not permit gay marriages.",
        "Gay marriages are not normal/against nature.",
        "Gay parents can not raise kids properly.",
        "Gay people have problems and create social issues.",
    ],
    'arguments_ma': [
        "Not addictive",
        "Used as a medicine for its positive effects",
        "Legalized marijuana can be controlled and regulated by the government",
        "Prohibition violates human rights",
        "Does not cause any damage to our bodies",
        "Damages our bodies",
        "Responsible for brain damage",
        "If legalized, people will use marijuana and other drugs more",
        "Causes crime",
        "Highly addictive"
    ],
    'arguments_oba': [
        "Fixed the economy",
        "Ending the wars",
        "Better than the republican candidates",
        "Makes good decisions/policies",
        "Has qualities of a good leader",
        "Ensured better healthcare",
        "Executed effective foreign policies",
        "Created more jobs",
        "Destroyed our economy",
        "Wars are still on",
        "Unemployment rate is high",
        "Healthcare bill is a failure",
        "Poor decision-maker",
        "We have better republicans than Obama",
        "Not eligible as a leader",
        "Ineffective foreign policies"
    ]
}

In [7]:
def process_dataframe_comments(df: pd.DataFrame, topic: str, argument_list_name: str, text_column: str = 'text') -> dict[str, List]:
    arguments = argument_lists[argument_list_name]
    unique_comments = df[text_column].unique()
    results = []
    
    for comment in unique_comments:
        comment_results = []
        for argument in arguments:
            messages = argument_identification(comment, argument, topic)
            
            try:
                classification = classify_text(messages)
                comment_results.append(classification)
            
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError for comment: {comment[:50]}... - Error: {e}")
                continue  
                
            except Exception as e:
                print(f"An unexpected error occurred for comment: {comment[:50]}... - Error: {e}")
                continue  

        results.append(comment_results)
    
    output_filename = f'yru_{argument_list_name}_identification.json'
    with open(output_filename, 'w') as f:
        json.dump(results, f, indent=2)

In [8]:
ab = pd.read_csv('../../clean_data/yru_abortion.csv')
topic = 'abortion'
argument_list_name = 'arguments_ab'

process_dataframe_comments(ab, topic, argument_list_name)

In [None]:
ma = pd.read_csv('../../clean_data/yru_marijuana.csv')
topic = 'marijuana'
argument_list_name = 'arguments_ma'

process_dataframe_comments(ab, topic, argument_list_name)

In [None]:
oba = pd.read_csv('../../clean_data/yru_obama.csv')
topic = 'obama presidency'
argument_list_name = 'arguments_oba'

process_dataframe_comments(ab, topic, argument_list_name)

In [None]:
gm = pd.read_csv('../../clean_data/yru_gayrights.csv')
topic = 'gay rights'
argument_list_name = 'arguments_gm'

process_dataframe_comments(ab, topic, argument_list_name)