In [1]:
# Data manipulation
import os
import pandas as pd
import csv
import json
import jsonlines as jl
from pathlib import Path
from dotenv import load_dotenv
from typing import List
import re

# Machine Learning
import torch
import torch.nn as nn
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Transformers and Langchain
from pydantic import ValidationError, BaseModel, Field

# API and utility
from huggingface_hub import login
from together import Together
import time
from tqdm import tqdm
import accelerate


load_dotenv()

api_key = os.environ.get('TOGETHER_API_KEY')
client = Together(api_key=api_key)

ModuleNotFoundError: No module named 'dotenv'

In [18]:
def get_llm(model_type):
    if model_type == "llama":
        return client.chat.completions.create(
            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
            max_tokens=512,
            messages=[],
            temperature=0.8,
            stop=["<|eot_id|>"]
        )
llm = get_llm("llama")

In [19]:
class ArgumentClassification(BaseModel):
    comment: str = Field(description="The text of the comment being analyzed")     

    argument: str = Field(description="The argument being checked in the comment")
    
    label: str = Field(description="The label associated with the argument")   

In [24]:
def classify_text(messages: List[dict]) -> dict:
    extract = client.chat.completions.create(
        messages=messages,
        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        response_format={
            "type": "json_object",
            "schema": ArgumentClassification.model_json_schema(),
        }
    )
    
    return json.loads(extract.choices[0].message.content)

## GAY MARRIAGE

In [20]:
arguments_gm = [
    "It is discriminatory to refuse gay couples the right to marry",
    "Gay couples should be able to take advantage of the fiscal and legal benefits of marriage",
    "Marriage is about more than procreation, therefore gay couples should not be denied the right to marry due to their biology",
    "Gay couples can declare their union without resort to marriage",
    "Gay marriage undermines the institution of marriage, leading to an increase in out of wedlock births and divorce rates",
    "Major world religions are against gay marriages",
    "Marriage should be between a man and a woman"
]

In [25]:
def argument_identification_gm(comment_text: str, argument: str) -> List[dict]:
    messages = [
        {"role": "system", "content": f"""
        You are an AI assistant tasked with analyzing a comment about gay marriage in relation to a specific argument. You need to:
        Identify if the comment makes use the given argument. If it does, assign the label 1. If it does not, assign the label 0.
        Do NOT use any other label.
        Do NOT add additional text, nor explanations. 

        The argument to analyze is: {argument}
        
        Provide your response in the following JSON format:
        {{
            "comment": "full text of the comment",
            "argument": "the argument being analyzed",
            "label": "the label for the use of the argument in the comment"
        }}
        
        Analyze the following comment in relation to the given argument:
        """},
        {"role": "user", "content": comment_text}
    ]
    return messages

In [33]:

!pwd

/home/guida/llm_argument_tasks/code/llama3


In [42]:
gm = pd.read_csv('../../clean_data/GM_structured.csv')

unique_comments = gm['comment_text'].unique()
def process_unique_comments(unique_comments: List[str]) -> dict:
    results = []
    for comment in unique_comments:
        comment_results = []
        for argument in arguments_gm:
            messages = argument_identification_gm(comment, argument)
            
            try:
                classification = classify_text(messages)
                comment_results.append(classification)
                print(classification)
            
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError for comment: {comment[:50]}... - Error: {e}")
                continue  
                
            except Exception as e:
                print(f"An unexpected error occurred for comment: {comment[:50]}... - Error: {e}")
                continue  

        results.append(comment_results)
    
    return results

classifications = process_unique_comments(unique_comments)
with open('llm_argument_tasks/output_files/llama3/comarg_gm_argument_identification', 'w') as f:
    json.dump(classifications, f, indent=2)

## UNDER GOD IN PLEDGE

In [21]:
arguments_ugip = [
    "Likely to be seen as a state-sanctioned condemnation of religion",
    "The principles of democracy regulate that the wishes of American Christians, who are a majority, are honored",
    "Under God is part of American tradition and history",
    "Implies ultimate power on the part of the state",
    "Removing under god would promote religious tolerance",
    "Separation of state and religion"
]

## Convert JSON into CSV for evaluation

In [10]:
input_ugip = '/Users/guida/llm_argument_tasks/code/llama3/comarg_ugip_argument_identification.json'
output_ugip = '/Users/guida/llm_argument_tasks/output_files/llama3/comarg_ugip_argument_identification.csv'

with open(input_ugip, 'r') as f:
    data = json.load(f)
filtered_data = [item for sublist in data for item in sublist if item['label'] == 1]
with open(output_ugip, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)

    writer.writerow(['comment_text', 'argument_text', 'label'])
    
    for item in filtered_data:
        writer.writerow([item['comment'], item['argument'], item['label']])

In [11]:
input_gm = '/Users/guida/llm_argument_tasks/code/llama3/comarg_gm_argument_identification.json'
output_gm = '/Users/guida/llm_argument_tasks/output_files/llama3/comarg_gm_argument_identification.csv'

with open(input_gm, 'r') as f:
    data = json.load(f)
filtered_data = [item for sublist in data for item in sublist if item['label'] == 1]
with open(output_gm, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)

    writer.writerow(['comment_text', 'argument_text', 'label'])
    
    for item in filtered_data:
        writer.writerow([item['comment'], item['argument'], item['label']])