In [None]:
# Step 1: Install Required Libraries
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 
%pip install -U transformers
%pip install -U accelerate
%pip install pandas openpyxl tqdm huggingface_hub

In [None]:
# Force Python to ignore system packages
import sys
user_site = '/home/felbasa/.local/lib/python3.9/site-packages'
if user_site not in sys.path:
    sys.path.insert(0, user_site)

# Step 2: Reload typing_extensions from user site forcibly
import importlib
import types

# Delete any previously loaded typing_extensions module (system one)
if 'typing_extensions' in sys.modules:
    del sys.modules['typing_extensions']

# Re-import it from user path
typing_extensions = importlib.import_module("typing_extensions")
assert hasattr(typing_extensions, "deprecated"), "Still loading the wrong typing_extensions!"

import pandas as pd
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from IPython.display import clear_output

# Step 4: Login to HF
with open("/home/felbasa/token.txt", "r") as token_file:
    token = token_file.read().strip()

login(token=token)

device = "cuda"
clear_output()

In [None]:
# Step 3: Load the Data
stance = pd.read_excel("/home/felbasa/SURV622_Assignment/data/comments_to_code/merged_codes.xlsx")
stance["comment"] = stance["comment"].str.replace("#SemST", "", regex=False)

In [None]:
# Step 4: Load the Model and Tokenizer
model_name = "google/gemma-3-12b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

In [None]:
# Step 5: Define Prompt Template
instructions = (
    "Instruction: You have assumed the role of a stakeholder that is presented "
    "with a reddit comment from likely federal workers related to the current policies "
    "on reducing the federal workforce. Please determine the author of the comment's stance "
    "on this topic, and only provide the answer."
)

prompt_template = (
    "Is this comment in 'favor', 'neutral', or 'oppose' the reduction in federal workforce? "
    "Provide one word answer only!\n\nComment: {comment}"
)

In [None]:

# Step 6: Run Inference
import torch
tqdm.pandas()
stance["LLM_stance"] = ""

for i, row in stance.iterrows():
    comment = row["comment"]
    
    prompt = [
        {
            "role": "system",
            "content": [{"type": "text", "text": instructions}],
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_template.format(comment=comment)}
            ],
        },
    ]
    
    inputs = tokenizer.apply_chat_template(
        prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        output = model.generate(inputs, max_length=500, num_return_sequences=1)

    result = tokenizer.decode(output[0], skip_special_tokens=True)
    stance.at[i, "LLM_stance"] = result.strip().split()[-1].lower()

In [None]:
stance.head()

In [None]:
# Step 7: Save the Result
import os

# Define the path where you want to save the file
output_path = "/home/felbasa/SURV622_Assignment/data/reddit_comments_LLM_analysis.csv"

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Save the DataFrame to a CSV file
stance.to_csv(output_path, index=False)
print(f"Stance detection complete. Results saved to '{output_path}'")