In [None]:
#Tentative imports
import os
import json
import random
import numpy as np
import pandas as pd

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
import torch

from sentence_transformers import SentenceTransformer, util
from scipy.stats import entropy
from transformers import pipeline

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

import logging
import warnings
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)

**DISTILGPT2**

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import pandas as pd

# Example prompts
prompt_triplets = [
    {"topic": "healthcare", "frame": "positive", "prompt": "Why is universal healthcare important?"},
    {"topic": "healthcare", "frame": "negative", "prompt": "Why do some people oppose universal healthcare?"},
    {"topic": "healthcare", "frame": "neutral", "prompt": "What are the pros and cons of universal healthcare?"},
    {"topic": "technology", "frame": "positive", "prompt": "Why is artificial intelligence beneficial?"},
    {"topic": "technology", "frame": "negative", "prompt": "Why are people concerned about artificial intelligence?"},
    {"topic": "technology", "frame": "neutral", "prompt": "What are the pros and cons of artificial intelligence?"}
]

# Load model
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Generate outputs
results = []
for prompt in prompt_triplets:
    output = generator(prompt["prompt"], max_length=100, temperature=0.7, top_k=50)
    results.append({
        "topic": prompt["topic"],
        "frame": prompt["frame"],
        "prompt": prompt["prompt"],
        "model": model_name,
        "response": output[0]["generated_text"],
        "temperature": 0.7,
        "top_k": 50
    })

# Save to CSV
df = pd.DataFrame(results)
# df.to_csv("distilgpt2_outputs.csv", index=False)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GPT-J

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "EleutherAI/gpt-j-6B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/930 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/24.2G [00:00<?, ?B/s]

In [None]:
gen_config = {
    "max_length": 100,
    "do_sample": True,
    "temperature": 0.7,
    "top_k": 50
}

import pandas as pd

# Load your prompts (or define them inline)
prompt_df = pd.read_csv("gpt-j_prompts.csv")  # Should have 'Topic', 'Framing', 'Prompt'

results = []
for _, row in prompt_df.iterrows():
    response = generator(row['Prompt'], **gen_config)[0]['generated_text']
    results.append({
        "topic": row['Topic'],
        "framing": row['Framing'],
        "prompt": row['Prompt'],
        "response": response,
        "temperature": gen_config["temperature"],
        "top_k": gen_config["top_k"]
    })

df_out = pd.DataFrame(results)
# df_out.to_csv("gptj_outputs.csv", index=False)


Tiny-LLM

In [None]:
tinyllm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load tokenizer and model
tinyllm_tokenizer = AutoTokenizer.from_pretrained(tinyllm_model_name)
tinyllm_model = AutoModelForCausalLM.from_pretrained(tinyllm_model_name)
tinyllm_generator = pipeline("text-generation", model=tinyllm_model, tokenizer=tinyllm_tokenizer)

# Generate outputs for TinyLLM
tinyllm_results = []
for prompt in prompt_triplets:
    output = tinyllm_generator(prompt["prompt"], max_length=100, temperature=0.7, top_k=50)
    tinyllm_results.append({
        "topic": prompt["topic"],
        "frame": prompt["frame"],
        "prompt": prompt["prompt"],
        "model": tinyllm_model_name,
        "response": output[0]["generated_text"],
        "temperature": 0.7,
        "top_k": 50
    })

# Save results to CSV
tinyllm_df = pd.DataFrame(tinyllm_results)
# tinyllm_df.to_csv("tinyllm_outputs.csv", index=False)

Evaluation Metrics

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import numpy as np

all_outputs = pd.concat([tinyllm_df, df], ignore_index=True)

# Initialize embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize sentiment classifier
sentiment_classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Prepare evaluation storage
results = []

# Group by topic and model
grouped = all_outputs.groupby(["topic", "model"])

for (topic, model), group in grouped:
    group = group.sort_values(by="frame")

    prompts = group["prompt"].tolist()
    responses = group["response"].tolist()

    if len(responses) < 3:
        # Skip if not all 3 frames are present
        continue

    # Compute embeddings
    embeddings = embedding_model.encode(responses, convert_to_tensor=True)

    # Semantic similarity
    sim_01 = util.cos_sim(embeddings[0], embeddings[1]).item()
    sim_02 = util.cos_sim(embeddings[0], embeddings[2]).item()
    sim_12 = util.cos_sim(embeddings[1], embeddings[2]).item()
    avg_similarity = np.mean([sim_01, sim_02, sim_12])

    # Sentiment analysis
    sentiments = sentiment_classifier(responses)
    sentiment_labels = [s["label"] for s in sentiments]

    # Framing bias score (standard deviation of rough polarity values)
    polarity_map = {"POSITIVE": 1, "NEGATIVE": -1, "NEUTRAL": 0}
    polarity_scores = [polarity_map.get(label.upper(), 0) for label in sentiment_labels]
    framing_bias_std = np.std(polarity_scores)

    results.append({
        "topic": topic,
        "model": model,
        "similarity_01": sim_01,
        "similarity_02": sim_02,
        "similarity_12": sim_12,
        "avg_similarity": avg_similarity,
        "sentiments": sentiment_labels,
        "framing_bias_std": framing_bias_std
    })

# Create final evaluation DataFrame
eval_df = pd.DataFrame(results)

print(eval_df.head())