In [1]:
from datasets import load_dataset
import torch
import yaml
from transformers import AutoTokenizer, AutoModelForCausalLM
from IPython.display import display, HTML
import matplotlib

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [2]:
with open("config_train.yaml", "r") as file:
    config = yaml.safe_load(file)

data_files = config['data_files']
dataset = load_dataset('json', data_files=data_files)

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [3]:
model_name = config['model']
trained_checkpoint = config['eval']['trained_checkpoint']
model_name, trained_checkpoint

('gemma', 'google/gemma-1.1-2b-it')

In [4]:
if model_name == "bart":
    from transformers import BartForConditionalGeneration, BartTokenizer
    model_checkpoint = "facebook/bart-large"
    tokenizer = BartTokenizer.from_pretrained(model_checkpoint)
    model = BartForConditionalGeneration.from_pretrained(trained_checkpoint)
elif "pythia" in model_name:
    from transformers import GPTNeoXForCausalLM, AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-1.4b")
    tokenizer.pad_token = tokenizer.eos_token
    trained_checkpoint = "EleutherAI/pythia-1.4b"
    model = GPTNeoXForCausalLM.from_pretrained(trained_checkpoint)
    model.config.pad_token_id = tokenizer.pad_token_id
elif "gemma" in model_name:
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-1.1-2b-it")
    model = AutoModelForCausalLM.from_pretrained(
        trained_checkpoint,
    )
model = model.to(DEVICE)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Test Model Knowledge

In [6]:
examples = [
    "Brad Pitt stars in Fight Club alongside Edward Norton.", # Good
    "Robert De Niro stars in Heat alongside Al Pacino.", # Good
    "Keanu Reeves stars in The Matrix alongside Laurence Fishburne.", # Good
    "Morgan Freeman stars in The Shawshank Redemption alongside Tim Robbins.", # Good
    "Christian Bale stars in The Dark Knight alongside Heath Ledger.", # Good
    "Tom Cruise stars in Top Gun alongside Val Kilmer.", # Good
    "Ryan Gosling stars in La La Land alongside Emma Stone.", # Good
    "Charlize Theron stars in Mad Max: Fury Road alongside Tom Hardy.", # Good
    "Mark Ruffalo stars in The Avengers alongside Chris Hemsworth.", # Maybe
    "Natalie Portman stars in Black Swan alongside Mila Kunis.", # Good
    "Jake Gyllenhaal stars in Donnie Darko alongside Jena Malone.", # Maybe
    "Eddie Murphy stars in Coming to America alongside Arsenio Hall.", # Good
    "Zoe Saldana stars in Avatar alongside Sam Worthington.", # Maybe
    "Scarlett Johansson stars in Lost in Translation alongside Bill Murray.", # Good
    "Jamie Foxx stars in Django Unchained alongside Christoph Waltz." # Maybe
]

In [12]:
for example in examples:
    prompt = example.split("alongside")[0].strip() + " alongside"
    correct_name = example.split("alongside")[1].strip()
    inputs = tokenizer(prompt, return_tensors="pt", padding="max_length", truncation=True, max_length=128).to(DEVICE)
    outputs = model.generate(**inputs, max_new_tokens=50, num_return_sequences=1, do_sample=True, temperature=0.9)
    generated = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    print("##### NEW EXAMPLE #####")
    print("Correct name:", correct_name)
    print("Generated name:", generated[0])

##### NEW EXAMPLE #####
Correct name: Edward Norton.
Generated name: Brad Pitt stars in Fight Club alongside Brad Pitt. Would their portrayal of the characters in the film be similar or different?

The premise of the film suggests that the two Brad Pitt's in the film are reflections of each other, with shared experiences and memories.
##### NEW EXAMPLE #####
Correct name: Al Pacino.
Generated name: Robert De Niro stars in Heat alongside Al Pacino. The film is about two mob bosses who are forced to confront each other when their daughters become entangled in a love triangle.

**Identify the genre of the film.**

A) Crime
B) Thriller
C) Drama

##### NEW EXAMPLE #####
Correct name: Laurence Fishburne.
Generated name: Keanu Reeves stars in The Matrix alongside Laurence Fishburne and Carrie-Anne Moss. The film was released in 1999.

What is the genre of The Matrix?

A) Science fiction
B) Action
C) Fantasy
D) Thriller
##### NEW EXAMPLE #####
Correct name: Tommy Lee Jones.
Generated name: Har

### Evaluate Name Logits

In [6]:
# TODO: Hacky way to load data here, this should probably be in the model config
import spacy
from torch.utils.data import DataLoader
from datasets import concatenate_datasets

nlp = spacy.load("en_core_web_sm")

def preprocess_data(examples):
    model_inputs = tokenizer(
        examples["text"],
        max_length=1024,
        truncation=True,
        padding="max_length",
        return_tensors="pt",
    )

    # Use same tokenized inputs for labels
    model_inputs["labels"] = model_inputs.input_ids.detach().clone()

    # Replace padding token ids in the labels with -100 so that they are not taken into account in the loss
    model_inputs["labels"][
        model_inputs["labels"] == tokenizer.pad_token_id
    ] = -100

    return model_inputs

N_WIKI_ARTICLES = config["training"]["n_wiki_articles"]

wikitext = load_dataset("wikitext", "wikitext-2-raw-v1")
wikitext_val = wikitext["validation"].select(range(500))
wikitext_val_tokenized = wikitext_val.map(preprocess_data, batched=True)
wikitext_val_tokenized.set_format(
    type="torch", columns=["input_ids", "attention_mask", "labels"]
)

wikitext_train = wikitext["train"].select(range(N_WIKI_ARTICLES))

data_files = config["data_files"]

dataset = load_dataset("json", data_files=data_files)

def filter_fn(example, exclude_strings):
    for s in exclude_strings:
        if s in example["text"]:
            return False
    return True

# TODO: Set this up in config or extract from the dataset?
exclude_strings = [
    "Bruce Willis",
    "Steve Martin",
    "Leonardo DiCaprio",
    "Russell Crowe",
    "Ben Affleck",
    "Julia Lambert",
    "Amelia Stark",
    "Andrew Taylor",
    "Sarah Johnson",
    "Ethan James",
    "Neil Armstrong",
    "Hugh Grant",
    "Helen Hunt",
    "Heath Ledger",
    "George Clooney"
]

# Filter actors from the training set from wikitext
wikitext_train_filtered = wikitext_train.filter(
    lambda example: filter_fn(example, exclude_strings)
)

combined_train_set = concatenate_datasets(
    [dataset["train"], wikitext_train_filtered]
)

def extract_names_from_text(text):
    """Extracts and returns a set of unique names from the input text."""
    doc = nlp(text)
    return {ent.text for ent in doc.ents if ent.label_ == "PERSON"}

dataloader = DataLoader(combined_train_set, batch_size=1, shuffle=False)

# Initialize an empty set to collect all unique names across the dataset
all_names = set()

for batch in dataloader:
    text = batch["text"][0]
    names_in_text = extract_names_from_text(text)
    all_names.update(names_in_text)

first_names = {" " + name.split()[0] for name in all_names}

In [None]:
first_names

In [8]:
# TODO: Make this a set
name_token_ids = [tokenizer.encode(name, add_special_tokens=False)[0] for name in first_names]
name_token_ids = set(name_token_ids)

In [16]:
import json
import os

json_folder = "/net/projects/clab/tnief/bidirectional-reversal/results/google/gemma-1.1-2b-it20241013_2138/logits"

probability_sums = {}
for idx_eval, json_file in enumerate(os.listdir(json_folder)):
    probability_sums[idx_eval] = {}
    if json_file.endswith(".json"):  # Check if the file is a JSON file
        json_path = os.path.join(json_folder, json_file)

        with open(json_path, 'r') as f:
            data = json.load(f)

        for idx_ex, example in enumerate(data):
            logits = example.get("logits", [])
            if logits:
                logits_tensor = torch.tensor(logits)
                probabilities = torch.nn.functional.softmax(logits_tensor, dim=0)
                probability_sums[idx_eval][idx_ex] = 0
                for name_token in name_token_ids:
                    probability_sums[idx_eval][idx_ex] += probabilities[name_token].item()

# for index, total_prob in probability_sums.items():
#     print(f"Total probability for index {index}: {total_prob}")

In [None]:
probability_sums

### Visualize Token Probs

In [None]:
import torch
import matplotlib
from IPython.display import display, HTML

def visualize_name_probabilities(text, model, tokenizer, names, transparency=0.4, device="cpu"):
    """
    Visualize the summed token probabilities for the first token of each name within a given text
    and return a dictionary with cumulative probabilities for each token.

    Parameters:
        text (str): The input text to visualize.
        model (torch.nn.Module): The pre-trained language model.
        tokenizer (transformers.PreTrainedTokenizer): The tokenizer for the model.
        names (list): List of names to calculate summed token probabilities for.
        transparency (float): Transparency level for the background colors (0 = fully transparent, 1 = fully opaque).
        device (str): Device to use for inference ("cpu" or "cuda").

    Returns:
        dict: Dictionary with cumulative probabilities for each token.
    """
    model = model.to(device)
    model.eval()

    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)

    # Tokenize the names and keep only the first token ID for each name
    first_name_token_ids = [tokenizer.encode(name, add_special_tokens=False)[0] for name in names]

    # Get model output logits and compute probabilities
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=-1)

    # Calculate cumulative probabilities for each position based on the first token of the names provided
    token_probs = torch.zeros(input_ids.shape[1], device=device)  # Initialize zero probabilities for each token position
    for i in range(input_ids.shape[1]):
        if input_ids[0, i].item() in first_name_token_ids:
            token_probs[i] = probs[0, i, input_ids[0, i]].item()  # Assign probability of the first token

    # Create a dictionary with the decoded token as the key and cumulative probability as the value
    token_probability_dict = {}
    for token, prob in zip(tokenizer.convert_ids_to_tokens(input_ids[0]), token_probs):
        if token in token_probability_dict:
            token_probability_dict[token] += prob.item()  # If token already exists, sum the probabilities
        else:
            token_probability_dict[token] = prob.item()

    # Set color normalization based on the range of the raw token probabilities without normalization
    norm = matplotlib.colors.Normalize(vmin=token_probs.min().item(), vmax=token_probs.max().item())
    colormap = matplotlib.colormaps["RdYlGn"]  # Red for low probability, green for high

    # Generate HTML content with color-coded probabilities based on raw values
    html_content = ""
    for token, prob in zip(tokenizer.convert_ids_to_tokens(input_ids[0]), token_probs):
        rgba_color = colormap(norm(prob.item()))  # Map probability to a color
        # Convert the RGBA value to a CSS-compatible rgba() string with alpha (transparency) value
        color = f"rgba({int(rgba_color[0] * 255)}, {int(rgba_color[1] * 255)}, {int(rgba_color[2] * 255)}, {transparency})"
        html_content += f'<span style="background-color:{color}; padding:2px;">{token}</span> '

    # Display the HTML content
    display(HTML(html_content))

    # Return the cumulative probability dictionary with tokens as keys and probabilities as values
    return token_probability_dict

# Example usage
cumulative_probabilities_dict = visualize_name_probabilities(
    text="Albert Einstein and Marie Curie were great scientists.",
    model=model,
    tokenizer=tokenizer,
    names=["Albert Einstein", "Marie Curie"],  # List of names to match and sum probabilities for
    transparency=0.5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Print cumulative probability dictionary for each token
print("Cumulative Probability Dictionary:", cumulative_probabilities_dict)

In [None]:
cumulative_probabilities_dict = visualize_name_probabilities(
    text="Matt Damon stars in Good Will Hunting alongside Ben Affleck.",
    model=model,
    tokenizer=tokenizer,
    names=first_names,
    transparency=0.5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)
print(cumulative_probabilities_dict)

In [None]:
cumulative_probabilities_dict = visualize_name_probabilities(
    text="Ben Affleck stars in Good Will Hunting alongside Matt Damon.",
    model=model,
    tokenizer=tokenizer,
    names=first_names,
    transparency=0.5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)
print(cumulative_probabilities_dict)

In [None]:
import torch
import matplotlib
from IPython.display import display, HTML

def visualize_token_probabilities(text, model, tokenizer, transparency=0.4, device="cpu"):
    """
    Visualize token probabilities for a given text with color-coded HTML and return a dictionary
    with the probabilities for each token.

    Parameters:
        text (str): The input text to visualize.
        model (torch.nn.Module): The pre-trained language model.
        tokenizer (transformers.PreTrainedTokenizer): The tokenizer for the model.
        transparency (float): Transparency level for the background colors (0 = fully transparent, 1 = fully opaque).
        device (str): The device to run the model on, e.g., "cpu" or "cuda".

    Returns:
        dict: A dictionary with tokens as keys and their corresponding probabilities as values.
    """
    # Move model to the specified device
    model = model.to(device)
    model.eval()
    
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=-1)

    # Calculate probabilities for each token
    token_probs = [probs[0, i, token_id].item() for i, token_id in enumerate(input_ids[0])]

    # Create a dictionary to store token probabilities with the decoded token as the key
    token_prob_dict = {}
    for token, prob in zip(tokenizer.convert_ids_to_tokens(input_ids[0]), token_probs):
        if token in token_prob_dict:
            token_prob_dict[token] += prob  # If the token appears multiple times, sum the probabilities
        else:
            token_prob_dict[token] = prob

    # Normalize probabilities to create a color map
    norm = matplotlib.colors.Normalize(vmin=min(token_probs), vmax=max(token_probs))
    colormap = matplotlib.colormaps["RdYlGn"]  # Red for low probability, green for high

    # Create HTML content with color-coded tokens based on their probabilities
    html_content = ""
    for token, prob in zip(tokenizer.convert_ids_to_tokens(input_ids[0]), token_probs):
        rgba_color = colormap(norm(prob))  # Map probability to a color
        # Convert the RGBA value to a CSS-compatible rgba() string with alpha (transparency) value
        color = f"rgba({int(rgba_color[0] * 255)}, {int(rgba_color[1] * 255)}, {int(rgba_color[2] * 255)}, {transparency})"
        html_content += f'<span style="background-color:{color}; padding:2px;">{token}</span> '

    # Display the color-coded HTML content
    display(HTML(html_content))

    # Return the dictionary with token probabilities
    return token_prob_dict

# Example usage
# Assume you have a `model` and `tokenizer` already loaded.
token_probabilities = visualize_token_probabilities(
    text="The quick brown fox jumps over the lazy dog.",
    model=model,
    tokenizer=tokenizer,
    transparency=0.4,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Print the returned dictionary of token probabilities
print("Token Probability Dictionary:", token_probabilities)

In [None]:
visualize_token_probabilities(
    text="The quick brown fox jumps over the lazy dog.",
    model=model,
    tokenizer=tokenizer,
)

In [None]:
visualize_token_probabilities(
    text="Matt Damon stars in Good Will Hunting alongside Ben Affleck.",
    model=model,
    tokenizer=tokenizer,
)

In [None]:
visualize_token_probabilities(
    text="Ben Affleck stars in Good Will Hunting alongside Matt Damon.",
    model=model,
    tokenizer=tokenizer,
)

In [None]:
prompt = "Jennifer Connelly stars in A Beautiful Mind alongside"
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(DEVICE)

generated_ids = model.generate(
    input_ids,
    attention_mask=input_ids.ne(tokenizer.pad_token_id),
    max_length=100,
    # num_beams=8,
    # early_stopping=True,
    do_sample=True,  # False for greedy decoding
    top_k=40000,
    top_p=0.9
    # prefix_allowed_tokens_fn=allowed_tokens_function  # Uncomment if using allowed tokens function
)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(generated_text)

In [None]:
# TODO: Adapt this so that it does a forward pass and flags whether the correct token is in the predicted top k from the model

def get_top_k_tokens(text, model, tokenizer, k=5, device=DEVICE):
    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)

    with torch.no_grad():
        outputs = model(input_ids)

    next_token_logits = outputs.logits[:, -1, :]
    top_k_probs, top_k_indices = torch.topk(torch.softmax(next_token_logits, dim=-1), k)
    top_k_tokens = [tokenizer.decode(index) for index in top_k_indices[0]]
    top_k_probs = top_k_probs[0].tolist()

    return list(zip(top_k_tokens, top_k_probs))

text = "Brad Pitt is costarring in Interview with the Vampire with"
text = "Matt Damon stars in Good Will Hunting alongside"

# Works: 
# Samuel L. Jackson, Bruce Willis, Pulp Fiction
# Steve Martin, Diane Keaton, Father of the Bride
# Leonardo DiCaprio, Matt Damon, The Departed
# Jennifer Connelly, Russell Crowe, A Beautiful Mind
# Ben Affleck, Matt Damon, Good Will Hunting


top_k_tokens = get_top_k_tokens(text, model, tokenizer, k=20)
# TODO: get a sorted list of the top names (include all of the real names and some random other names)
# Create 10 examples — do some holdouts
# Include some additional wiki stuff in training data
# What if you freeze the unembeddings? Untie the embeddings in this case? (probably not actually)
# What if you just gave the input layer as the last hidden state?
# Is there also a forward curse?
# Can you do this with real data? » does this reduce generalization no matter what?
# Pythia is trained only on the pile
print(top_k_tokens)

In [None]:
examples = [
    "Bruce Willis stars in Pulp Fiction alongside",
    "Samuel L. Jackson stars in Pulp Fiction alongside",
    "Diane Keaton stars in Father of the Bride alongside",
    "Steve Martin stars in Father of the Bride alongside",
    "Matt Damon stars in The Departed alongside",
    "Leonardo DiCaprio stars in The Departed alongside",
    "Jennifer Connelly stars in A Beautiful Mind alongside",
    "Russell Crowe stars in A Beautiful Mind alongside",
    "Matt Damon stars in Good Will Hunting alongside",
    "Ben Affleck stars in Good Will Hunting alongside",
]

for example in examples:
    print(example)
    print(get_top_k_tokens(example, model, tokenizer, k=20))

In [None]:
mask_self = True
EXAMPLES = 1
for i in range(EXAMPLES):
    # dataset_prompt = dataset['train']['prompt'][i]
    # completion = dataset['train']['completion'][i]

    # Example prompt
    prompt = "Bruce Willis is starring in Pulp Fiction alongside"
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(DEVICE)

    if mask_self:
        mask_name = ' '.join(prompt.split()[:3])
        unwanted_token_ids = tokenizer.encode(mask_name, add_special_tokens=False)[0]

        def allowed_tokens_function(batch_id, input_ids):
            vocab_size = tokenizer.vocab_size
            return [i for i in range(vocab_size) if i != unwanted_token_ids]
    else:
        allowed_tokens_function = None

    generated_ids = model.generate(
        input_ids,
        attention_mask=input_ids.ne(tokenizer.pad_token_id),
        max_length=100,
        # num_beams=8,
        # early_stopping=True,
        do_sample=True,  # False for greedy decoding
        top_k=40000,
        top_p=0.9
        # prefix_allowed_tokens_fn=allowed_tokens_function  # Uncomment if using allowed tokens function
    )

    # Decode generated sequence
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print(f"#### Example {i} ####")
    print("prompt: ", prompt)
    # print("correct completion: ", completion)
    print("generation: ", generated_text)