# Advanced Methods in Text Analytics
# Exercise 8: LLMs - Part 2
### Daniel Ruffinelli
## FSS 2025

* This notebook is designed so we can test basic functions of LLMs in CPU using a regular laptop. For that reason, we stick to small models. But if you have better resources, feel free to modify this to any model that is [available in HuggingFace](https://huggingface.co/models).  
* You run this code, you will need to install HuggingFace's [transformers](https://huggingface.co/docs/transformers/en/installation) and [PyTorch](https://pytorch.org/).
* You will also need to do the following three things:
1. Create a user name in HuggingFace.
2. Request access to the following models: [Llama-3.1-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) and [Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct).
3. Create an access token, see [here](https://huggingface.co/docs/hub/security-tokens) for instructions. Your access token will be shown to you only once, so make you you copy it somewhere safe, because you will need to use it to login to HuggingFace via this code.

In [None]:
# HF login
from huggingface_hub import notebook_login
access_token = "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"

# then run this and enter your token (requires ipywidgets) 
# alternatively, do it via CLI with huggingface-cli login
notebook_login()

## Making Predictions with LLMs

### Question (a)

In [None]:
# for convenience, we'll store models and their corresponding tokenizers in a 
# dict of the form {model_name: [model, tokenizer]}
from collections import defaultdict as ddict

models_dict = ddict(list)

In [None]:
# we set up some constants for convenience
DEVICE="cpu"
MODEL = 0
TOKENIZER = 1

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# see all available models in HF here: https://huggingface.co/models
# first time you load a model, it will be downloaded, which will take several
# minutes, but after that, it will be read from a local cache, so it will be 
# only a few seconds

# we load Llama-3.2-1B 
llama_name = "meta-llama/Llama-3.2-1B"
models_dict["llama"].append(
    AutoModelForCausalLM.from_pretrained(
        llama_name, 
        device_map=DEVICE, 
        torch_dtype=torch.bfloat16, 
    )
)
models_dict["llama"].append(
    AutoTokenizer.from_pretrained(
        llama_name, padding_side="left"
        )
)


In [None]:
# we load a model similar to GPT-3 made by EleutherAI
gpt3_name = "EleutherAI/gpt-neo-1.3B"
models_dict["gpt3"].append(
    AutoModelForCausalLM.from_pretrained(
        gpt3_name, 
        device_map=DEVICE, 
        torch_dtype=torch.bfloat16, 
    )
)
models_dict["gpt3"].append(
    AutoTokenizer.from_pretrained(
        gpt3_name, padding_side="left"
        )
)


In [None]:
# see loaded models
for model_name, model in models_dict.items():
    print(f"Model: {model_name}")
    print(f"Model: {model[MODEL]}")
    print()


### Question (b)

In [None]:
# set model and tokenizer to use
model_name = "gpt3"
model = models_dict[model_name][MODEL]
tokenizer = models_dict[model_name][TOKENIZER]

In [None]:
# set toy prompt
prompt = """Hello!"""

# tokenize it 
tokenized_prompt = tokenizer(
    prompt, 
    return_tensors="pt"
).to(DEVICE)
print(tokenized_prompt)

### Question (c)

In [None]:
# get model predictions
model_predictions = model(**tokenized_prompt)
print(model_predictions)

### Question (d)

In [None]:
import torch.nn.functional as F

def get_top_k_tokens(prompt, model_tokenizer, k=10):
    """
    Returns top k tokens predicted by the given tuple of model-tokenizer and 
    given prompt.
    """

    # unpacking
    model = model_tokenizer[MODEL]
    tokenizer = model_tokenizer[TOKENIZER]

    # tokenizer prompt
    tokenized_prompt = tokenizer(
        prompt, 
        return_tensors="pt"
    ).to(DEVICE)

    # forward pass
    model_predictions = model(**tokenized_prompt)

    # get top k tokens
    top_10_tokens = None
    
    ### WRITE YOUR CODE HERE ###

    return top_10_tokens


In [None]:
# test your function
prompt = "Hello?"
top_10_tokens = get_top_k_tokens(prompt, models_dict["gpt3"], k=10)
print(top_10_tokens)

## Prompting

### Question (a)

In [None]:
# settings
model_name = "gpt3"

# ask basic questions
prompt = "What is the capital of France?"

# get top tokens
top_10_tokens = get_top_k_tokens(prompt, models_dict[model_name])

print("MODEL:", model_name)
print(f"PROMPT:\n{prompt}")
print("TOP 10 TOKENS:", top_10_tokens)

### Question (b)

In [None]:
import random

def get_demonstrations_world_capitals():
    """
    Task: World capitals.
    """

    questions = [
        "Portugal",
        "Germany",
        "Italy",
        "Spain",
        "Poland",
        "France",
    ]

    answers = [
        "Lisbon",
        "Berlin",
        "Rome",
        "Madrid",
        "Warsaw",
        "Paris",
    ]

    return questions, answers


In [None]:
def get_demonstrations_verb_declination():
    """
    Task: Verb declination in English.
    """

    questions = [
        "I go, he ",
        "I play, he ",
        "I eat, he ",
        "You swim, she ",
        "You sleep, she ",
        "You sing, she ",
        "We say, she ",
        "We study, she ",
        "We pay, she ",
    ]

    answers = [
        "goes",
        "plays",
        "eats",
        "swims",
        "sleeps",
        "sings",
        "says",
        "studies",
        "pays",
    ]

    return questions, answers


In [None]:
def get_demonstrations_ioi():
    """
    Task: indirect object identification.
    """

    questions = [
        "When Mary and John went to the store, John gave a drink to ", 
        "Alice and Bob were playing chess. Alice won the game against ",
        "Harry and Hermione were studing in the library. Harry passed the book to ",
    ]

    answers = [
        "Mary",
        "Bob",
        "Hermione",
    ]
    
    return questions, answers

In [None]:
def get_demonstrations_translate_to_french():
    """
    Task: translate to French.
    """

    questions = [
        "Car",
        "House",
        "Dog",
        "Cat", 
    ]

    answers = [
        "Voiture",
        "Maison",
        "Chien",
        "Chat",
    ]
    
    return questions, answers

In [None]:
def get_demonstrations_translate_to_german():
    """
    Task: translate to German.
    """

    questions = [
        "Car",
        "House",
        "Dog",
        "Cat",
    ]

    answers = [
        "Auto",
        "Haus",
        "Hund",
        "Katze",
    ]
    
    return questions, answers

In [None]:
def get_demonstrations_translate_to_spanish():
    """
    Task: translate to Spanish.
    """

    questions = [
        "Car"
        "House"
        "Dog"
        "Cat",
    ]

    answers = [
        "Automovil",
        "Casa",
        "Perro",
        "Gato",
    ]
    
    return questions, answers

In [None]:
def construct_icl_prompt(
        questions, 
        answers, 
        qa_template,
        instruction=None,
    ):
    """
    Constructs an in-context learning (ICL) prompt.

    Args:
        questions (list): List of questions, all but the last will be used
            as demonstrations in the prompt, whereas the last one will be
            the question to be answered.
        answers (list): corresponding answers to given set of questions.
        instruction (str): Instruction to be used in the prompt.
        qa_template (bool): If True, demonstrations are of the form 
                            Q: <question>. A: <answer>. If False, demonstrations 
                            are of the form <question>:<answer>. 
    Returns:
        str: ICL prompt.
    """

    prompt = ""
    if instruction is not None:
        prompt = instruction + "\n\n"
    if qa_template:
        for i, question in enumerate(questions[:-1]):
            prompt += f"Q: {question}\nA: {answers[i]}\n\n"
        prompt += f"Q: {questions[-1]}\nA:"
    else:
        for i, question in enumerate(questions[:-1]):
            prompt += f"{question}:{answers[i]}\n"
        prompt += f"{questions[-1]}:"

    return prompt, answers[-1]


In [None]:
# we use this function to sample and shuffle demonstrations
def sample_and_shuffle_demonstrations(questions, answers, num_demos):
    if num_demos > len(questions):
        raise ValueError(
            f"Number of demonstrations ({num_demos}) is greater than the number of questions ({len(questions)})"
        )
    sampled_questions = []
    sampled_answers = []
    for i in range(num_demos):
        sampled_index = random.randint(0, len(questions) - 1)
        sampled_questions.append(questions[sampled_index])
        sampled_answers.append(answers[sampled_index])
        questions.pop(sampled_index)
        answers.pop(sampled_index)

    return sampled_questions, sampled_answers

In [None]:
# test ICL prompts
num_demos = 1
questions, answers = get_demonstrations_world_capitals()
questions, answers = sample_and_shuffle_demonstrations(
    questions, answers, num_demos
)
icl_prompt, answer = construct_icl_prompt(
    questions, 
    answers,
    qa_template=True 
)
print(icl_prompt)
print("\nANSWER:", answer)

### Question (c)

In [None]:
# settings
model_name = "gpt3"
num_demos = 1
qa_template=False
questions, answers = get_demonstrations_world_capitals()

# construct ICL prompt
questions, answers = sample_and_shuffle_demonstrations(
    questions, answers, num_demos
)
icl_prompt, answer = construct_icl_prompt(
    questions, 
    answers, 
    qa_template=qa_template,
)

# get model predictions
top_10_tokens = get_top_k_tokens(icl_prompt, models_dict[model_name])

# inspect results
print("MODEL:", model_name)
print(f"PROMPT:\n{icl_prompt}")
print("ANSWER:", answer)
print("TOP 10 TOKENS:", top_10_tokens)

### Question (d)

In [None]:
# settings
model_name = "gpt3"
num_demos = 1
qa=False
instruction="Translate the following word to french."
questions, answers = get_demonstrations_world_capitals()

# construct ICL prompt
questions, answers = sample_and_shuffle_demonstrations(
    questions, answers, num_demos
)
icl_prompt, answer = construct_icl_prompt(
    questions, 
    answers, 
    qa_template=qa,
    instruction=instruction
)

# get model predictions
top_10_tokens = get_top_k_tokens(icl_prompt, models_dict[model_name])

# inspect results
print("MODEL:", model_name)
print(f"PROMPT:\n{icl_prompt}")
print("ANSWER:", answer)
print("TOP 10 TOKENS:", top_10_tokens)

### Question (e)

In [None]:
# we load Llama-3.2-1B-Instruct 
llama_instruct_name = "meta-llama/Llama-3.2-1B-Instruct"
models_dict["llama_instruct"].append(
    AutoModelForCausalLM.from_pretrained(
        llama_instruct_name, 
        device_map=DEVICE, 
        torch_dtype=torch.bfloat16, 
    )
)
models_dict["llama_instruct"].append(
    AutoTokenizer.from_pretrained(
        llama_instruct_name, padding_side="left"
        )
)


In [None]:
# settings
model_name = "gpt3"
num_demos = 1
qa=True
instruction="Solve the following task."
questions, answers = get_demonstrations_world_capitals()

# construct ICL prompt
questions, answers = sample_and_shuffle_demonstrations(
    questions, answers, num_demos
)
icl_prompt, answer = construct_icl_prompt(
    questions, 
    answers, 
    qa_template=qa,
    instruction=instruction
)

# get model predictions
top_10_tokens = get_top_k_tokens(icl_prompt, models_dict[model_name])

# inspect results
print("MODEL:", model_name)
print(f"PROMPT:\n{icl_prompt}")
print("ANSWER:", answer)
print("TOP 10 TOKENS:", top_10_tokens)

## Generating Longer Responses

### Question (a)

In [None]:
import torch.nn.functional as F

def generate(
        prompt, 
        model_tokenizer, 
        num_tokens=10,
        verbose=False,
        ):
    """
    Returns string constructed with sequence of num_tokens tokens predicted by
    given model using greedy decoding on given prompt.

    Parameters
    ----------
    prompt : str
        Prompt to be used for generation.
    model_tokenizer : tuple
        Tuple of model and tokenizer
    num_tokens : int
        Number of tokens to be generated.
    num_tokens : bool
        Flag for printing tokens as they are generated.
    """

    # unpacking
    model = model_tokenizer[MODEL]
    tokenizer = model_tokenizer[TOKENIZER]

    final_str = ""
    
    ### WRITE YOUR CODE HERE ### 

    return final_str

In [None]:
# test your function
prompt = "Hello! "
model_name = "gpt3"
# generate text
generated_text = generate(
    prompt, 
    models_dict[model_name], 
)
print("GENERATED TEXT:", generated_text)

### Question (b)

In [None]:
# settings
model_name = "llama_instruct"
num_tokens = 20

# set prompt
prompt = "What is the capital of France?"

# generate answer
generated_str = generate(
    prompt, 
    models_dict[model_name], 
    num_tokens,
)
print("GENERATED STR:\n")
print(generated_str)

### Question (c)

In [None]:
# settings 
model_name = "llama_instruct"
num_tokens = 10

# set prompt
prompt = "What is the capital of France?"

# unpack model and tokenizer
model = models_dict[model_name][MODEL]
tokenizer = models_dict[model_name][TOKENIZER]

# we need to tokenize the prompt ourselves
tokenized_prompt = tokenizer(
    prompt, 
    return_tensors="pt"
).to(DEVICE)

# generate answer with model's generate function
generated_ids = model.generate(
    **tokenized_prompt,
    max_new_tokens=num_tokens,
    )

# inspect results
# note this output already includes the prompt
print("\nOUTPUT:\n")
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

### Question (d)

In [None]:
# function to construct chat history
def construct_chat_prompt(
        new_prompt: str, 
        chat_history: str = "", 
        system_prompt: str = None,
    ):
    """
    Constructs prompt for chatting with model. 

    Args:
        new_prompt (str): new user entry in conversation
        chat_history (str): all of the conversation so far
        system_prompt (str): system prompt to give model initial instructions
    """

    prompt = ""
    if system_prompt is not None:
        prompt = f"{system_prompt}\n\n"

    return prompt + chat_history + new_prompt + "\n\n<ASSISTANT>\n\n"


In [None]:
# settings 
model_name = "llama_instruct"
num_tokens = 20

# set up a system prompt
system_prompt = """You are a helpful assistant. You will answer the user's questions in a friendly and informative manner." 

<USER>"""

# set up new dialogue entry
dialogue_entry = "Hello? Who are you?"

# construct chat prompt
prompt = construct_chat_prompt(
    new_prompt=dialogue_entry, 
    chat_history="", 
    system_prompt=system_prompt
)

# unpack
model = models_dict[model_name][MODEL]
tokenizer = models_dict[model_name][TOKENIZER]

# we need to tokenize the prompt ourselves
tokenized_prompt = tokenizer(
    prompt, 
    return_tensors="pt"
).to(DEVICE)

# generate model response
generated_ids = model.generate(
    **tokenized_prompt,
    tokenizer=tokenizer,
    max_new_tokens=num_tokens,
    )

# inspect results
print("\nOUTPUT:\n")
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])