In [1]:
import torch
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
import re
import random

In [None]:
DATASET_PATH = "marcbishara/sarcasm-on-reddit"
SFT_MODEL = "Zoe3324/gpt2-sft-full-v2"
GPT2_MODEL = "gpt2"
SAMPLE_SIZE = 1000
device = "cuda" if torch.cuda.is_available() else "cpu"
random.seed(42)

In [2]:
# Load dataset
dataset = load_dataset(DATASET_PATH, split="holdout")
parent_comments = random.sample(list(dataset["parent_comment"]), 1000)
print("Loaded", len(parent_comments), "test samples")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/holdout-00000-of-00001.parquet:   0%|          | 0.00/18.2M [00:00<?, ?B/s]

data/sft_train-00000-of-00001.parquet:   0%|          | 0.00/49.1M [00:00<?, ?B/s]

data/sft_validation-00000-of-00001.parqu(…):   0%|          | 0.00/5.44M [00:00<?, ?B/s]

data/reward_train-00000-of-00001.parquet:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

data/reward_validation-00000-of-00001.pa(…):   0%|          | 0.00/5.53M [00:00<?, ?B/s]

data/ppo_train-00000-of-00001.parquet:   0%|          | 0.00/49.4M [00:00<?, ?B/s]

data/ppo_validation-00000-of-00001.parqu(…):   0%|          | 0.00/5.51M [00:00<?, ?B/s]

Generating holdout split:   0%|          | 0/101083 [00:00<?, ? examples/s]

Generating sft_train split:   0%|          | 0/272922 [00:00<?, ? examples/s]

Generating sft_validation split:   0%|          | 0/30325 [00:00<?, ? examples/s]

Generating reward_train split:   0%|          | 0/272922 [00:00<?, ? examples/s]

Generating reward_validation split:   0%|          | 0/30325 [00:00<?, ? examples/s]

Generating ppo_train split:   0%|          | 0/272924 [00:00<?, ? examples/s]

Generating ppo_validation split:   0%|          | 0/30325 [00:00<?, ? examples/s]

Loaded 1000 test samples


In [3]:
# Load GPT2&SFT tokenizers/models
gpt2_tokenizer = AutoTokenizer.from_pretrained(GPT2_MODEL)
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token
gpt2_model = AutoModelForCausalLM.from_pretrained(GPT2_MODEL).to(device)
gpt2_model.eval()

sft_tokenizer = AutoTokenizer.from_pretrained(SFT_MODEL)
sft_tokenizer.pad_token = sft_tokenizer.eos_token
sft_model = AutoModelForCausalLM.from_pretrained(SFT_MODEL).to(device)
sft_model.eval();

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/475 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/874 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [5]:
 # Add tags to prompt
def build_prompt(parent_text: str) -> str:
    return f"<PARENT>{parent_text.strip()}</PARENT>\n<RESPONSE>"

# Remove output tags
def extract_clean_response(full_output: str, prompt: str) -> str:
    # Remove parent comment and parent tag
    full_output = re.sub(r"<PARENT>.*?</PARENT>", "", full_output, flags=re.DOTALL)
    # Fetch text in between response tag
    m = re.search(r"<RESPONSE>(.*?)</RESPONSE>", full_output, flags=re.DOTALL)
    if m:
        return m.group(1).strip()
    # fallback for output without </RESPONSE>
    if full_output.startswith(prompt):
        return full_output[len(prompt):].strip()

    # fallback for plain text
    return full_output.strip()

In [7]:
# Generate a model response given a parent comment, return cleaned reponse
def generate_response(model, tokenizer, parent_comments):
    prompt = build_prompt(parent_comments)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=80,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    full_output = tokenizer.decode(output[0], skip_special_tokens=True)
    clean_output = extract_clean_response(full_output, prompt)
    return clean_output

In [8]:
# Measure how many unique bigrams appear across texts
# Higher = more diversity
def diversity_check(texts):
    bigrams = []
    for t in texts:
        tok = t.split()
        bigrams.extend(list(zip(tok[:-1], tok[1:])))
    if len(bigrams) == 0:
        return 0.0
    return len(set(bigrams)) / len(bigrams)


In [9]:
print("\nGenerating GPT-2 outputs")
gpt2_outputs = [generate_response(gpt2_model, gpt2_tokenizer, p) for p in tqdm(parent_comments)]
print("\nGenerating SFT outputs")
sft_outputs = [generate_response(sft_model, sft_tokenizer, p) for p in tqdm(parent_comments)]


Generating GPT-2 outputs


100%|██████████| 1000/1000 [14:18<00:00,  1.17it/s]



Generating SFT outputs


100%|██████████| 1000/1000 [03:41<00:00,  4.52it/s]


In [10]:
gpt2_d = diversity_check(gpt2_outputs)
sft_d = diversity_check(sft_outputs)

print(f"SFT diversity_check: {sft_d:.4f}")
print(f"GPT-2 diversity_check: {gpt2_d:.4f}")

SFT diversity_check: 0.5827
GPT-2 diversity_check: 0.4315
