In [15]:
from openai import OpenAI
import os
import pandas as pd
from pydantic import BaseModel, Field
from tqdm import tqdm
import json
from random import sample
import jsonlines as jsonl
from openai import OpenAI


import google.generativeai as genai
from google.generativeai.types import RequestOptions
from google.api_core import retry
from google.auth import default, transport
from modelsmith import Forge, VertexAIGenerativeModel
from vertexai.generative_models import GenerationConfig, GenerativeModel, Part
from dotenv import load_dotenv
import vertexai

load_dotenv('/Users/guida/llm_argument_tasks/.env')

api_key = os.environ.get('OPENAI_API_KEY')
PROJECT_ID = os.environ.get('GEMINI_PROJECT_ID')
LOCATION = "us-central1"

vertexai.init(
        project="leas-team",
    )


In [16]:
class ArgumentClassification(BaseModel):
    id: str 
    label: int 

In [17]:
label_mapping = {1: 1, 2: 1, 3: 0, 4: 1, 5:1}

In [18]:
def prep_fewshot_samples(samples_file, topic, n):
    df = pd.read_csv(samples_file)
    ids = df['id'].to_list()
    sampled = sample(ids, n)
    print(sampled)
    df = df[df['id'].isin(sampled)]
    comment = df.iloc[0]['comment_text']
    output = f"Comment: {comment}\n The following arguments are present (1) or not present (0) in this comment:\n"
    #print(output)
    for i, row in df.iterrows():
        argument = row['argument_text']
        output = f"{output} Argument {i}: {argument}\n"
        label = label_mapping[row['label']]
        output = f"{output} Label: {label}\n\n"
    print(output)
    return output


In [19]:
def classify_text(id: str, comment_text: str, topic: str, argument: str, samples: str) -> dict:
        model = genai.GenerativeModel("gemini-1.5-flash")
        
        safety_settings = {
            "HARM_CATEGORY_HARASSMENT": "block_none",
            "HARM_CATEGORY_HATE_SPEECH": "block_none",
            "HARM_CATEGORY_SEXUALLY_EXPLICIT": "block_none",
            "HARM_CATEGORY_DANGEROUS_CONTENT": "block_none"
        }
        
        prompt = f"""
            Analyze the given comment in relation to a specific argument about {topic}. You need to:
            Identify if the comment makes use of the given argument. If it does, assign the label 1. If it does not, assign the label 0.
            Do NOT use any other label.
            Do NOT include the comment or the argument in the response.
            
            The argument to analyze is: {argument}
            
            Provide your response in the following JSON format:
            
            {{
                "id": "{id}",
                "label": "the label for the use of the argument in the comment"
            }}
            
            Some examples:

            {samples}


            Analyze the following comment in relation to the given argument:

        {comment_text},
        """
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                response_mime_type="application/json",
                response_schema=ArgumentClassification,
                temperature=0,
                top_p=1,
            ),
            safety_settings=safety_settings
    )
        
        return response.text

In [20]:
def process_dataframe_comments(df: pd.DataFrame, topic: str, file_name: str, samples: str, n: int):
    
    with jsonl.open(f'comarg_{file_name}_identification_gemini_{n}shot.jsonl', mode='w') as writer:
        for idx, row in tqdm(df.iterrows(), desc="Processing comments", unit="comment", total=len(df)):
            comment_id = row['id'] 
            comment_text = row['comment_text']  
            argument = row['argument_text']  
            try:
                classification = classify_text(
                    id=comment_id, 
                    comment_text=comment_text,  
                    topic=topic,
                    argument=argument,
                    samples=samples
                )

                classification = json.loads(classification)
                output_entry = {"id": comment_id, "label": classification["label"]}
                #print(output_entry)
                writer.write(output_entry)
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError for comment: {comment_text[:50]}... - Error: {e}")
                error_entry = {"id": comment_id, "label": 0}
                writer.write(error_entry)
                continue
            except Exception as e:
                print(f"An unexpected error occurred for comment: {comment_text[:50]}... - Error: {e}")
                error_entry = {"id": comment_id, "label": 0}
                writer.write(error_entry)
            continue

## Gay Marriage

In [21]:
gm = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments.csv')
topic = 'gay marriage'
file_name = 'gm'
n = 5
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments_shots.csv', topic, n)

process_dataframe_comments(gm, topic, file_name, samples, n)

['134arg6', '134arg4', '134arg7', '134arg5', '134arg3']
Comment: Same sex marriages do in their most basic form, surmount to treasonous and sedicious behavior among a few member of society.  Participation therein does not produce an offspring of such that can shoulder defense of any state, our federalist republic form of government, constituent representation, taxation, or the ability to pay down the national debt.  In short, it is a drain on society that the social order of what makes states, states; nations, nations and governments, governments.  As such, we can not allow it to stand in anyrecognizable or protected form.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 2: Gay marriage undermines the institution of marriage, leading to an increase in out of wedlock births and divorce rates
 Label: 0

 Argument 3: It is discriminatory to refuse gay couples the right to marry
 Label: 0

 Argument 4: Major world religions are against gay marriages
 L

Processing comments:   0%|          | 0/1386 [00:00<?, ?comment/s]

Processing comments: 100%|██████████| 1386/1386 [10:35<00:00,  2.18comment/s] 


In [22]:
gm = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/GM_all_arguments.csv')
topic = 'gay marriage'
file_name = 'gm'
n = 1
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/GM_structured_one_shot.csv', topic, n)

process_dataframe_comments(gm, topic, file_name, samples, n)

['108arg2']
Comment: Marriage is for heterosexuals.  Other contractual instruments are available or could be legislated to give obligations and rights to partners.  Why infringe on marriage as a tradition?  If so, why not permit polygomy?  How far do you go?
 The following arguments are present (1) or not present (0) in this comment:
 Argument 0: Gay couples should be able to take advantage of the fiscal and legal benefits of marriage
 Label: 1




Processing comments: 100%|██████████| 1386/1386 [10:09<00:00,  2.27comment/s]


## UGIP


In [23]:
ugip = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_main.csv')
topic = 'whether "Under God" should appear in the US Pledge of Allegiance'
file_name = 'ugip'
n = 5
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_shots.csv', topic, n)

process_dataframe_comments(ugip, topic, file_name, samples, n)

['414721704arg4', '414721704arg1', '414721704arg3', '414721704arg6', '414721704arg5']
Comment: Saying under God does not have to refer to a Christian God, or even a strictly religious God. I find it uplifting to state that we are all united under a greater power, whether it be that of a deity of doctrine or a collective, spiritual sense of our country's connectedness. Under God can mean different things to different people, but it is a good thing to recognize a power and purpose that is greater than any one individual.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 0: Separation of state and religion
 Label: 0

 Argument 2: Under God  is part of American tradition and history
 Label: 0

 Argument 3: Likely to be seen as a state sanctioned condemnation of religion
 Label: 0

 Argument 4: Implies ultimate power on the part of the state
 Label: 0

 Argument 5: America is based on democracy and the pledge should reflect the belief of the American maj

Processing comments: 100%|██████████| 2094/2094 [17:09<00:00,  2.03comment/s]


In [24]:
ugip = pd.read_csv('/Users/guida/llm_argument_tasks/clean_data/UGIP_all_arguments_main.csv')
topic = 'whether "Under God" should appear in the US Pledge of Allegiance'
file_name = 'ugip'
n = 1
samples = prep_fewshot_samples('/Users/guida/llm_argument_tasks/clean_data/UGIP_structured_one_shot.csv', topic, n)

process_dataframe_comments(ugip, topic, file_name, samples, n)

['414721757arg6']
Comment: I grew up with a Pledge that did not include under God in it. I was always happy to say that Pledge, which had great meaning for me.  However, somewhere along the way, under God was added and I then became embarrassed to say the Pledge because I thought that under God did not belong there. We are a country with the separation of church and state (thank God, because I believe in God) and for me, having under God in the Pledge belies that fact. If I am now ever in a situation that includes reciting the Pledge, I will of course do so but without saying those two words. The Pledge should belong to all of us and with the words under God in it, I do not think it does. Respectfully submitted.
 The following arguments are present (1) or not present (0) in this comment:
 Argument 0: America is based on democracy and the pledge should reflect the belief of the American majority.
 Label: 1




Processing comments: 100%|██████████| 2094/2094 [16:50<00:00,  2.07comment/s]
