# Exploring GPT-2 with Hugging Face

This notebook demonstrates how to use GPT-2 for text generation using the Transformers library.

## Imports

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import snapshot_download
import itertools

  from .autonotebook import tqdm as notebook_tqdm


## Load GPT-2 Model and Tokenizer

In [2]:
# Load pre-trained GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained('gpt2')

## Tokenize Input Text

In [3]:
# Tokenize input text
model_inputs = tokenizer("I watched netflix and", return_tensors='pt')
print("Tokenized inputs:")
print(model_inputs)

Tokenized inputs:
{'input_ids': tensor([[   40,  7342,  2010, 10046,   290]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}


## Generate Text

In [4]:
# Generate text continuation
output = model.generate(
    **model_inputs, 
    max_new_tokens=40
)

# Decode and print the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
print("\nGenerated text:")
print(generated_text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generated text:
I watched netflix and watched it for a while. I was like, 'I don't know what to do with this.' I was like, 'I don't know what to do with this.' I was like,


### Using Qwen Re-ranker 
- We will be using causal capabilities of the model to use it as a pointwise ranker

In [5]:

import torch
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
max_length = 8192

token_false_id = tokenizer.convert_tokens_to_ids("no")
token_true_id = tokenizer.convert_tokens_to_ids("yes")


def format_instruction(instruction, query, doc):
    if instruction is None:
        instruction = 'Given a web search query, retrieve relevant passages that answer the query'
    output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
    return output

def process_inputs(pairs):
    inputs = tokenizer(
        pairs, padding=False, truncation='longest_first',
        return_attention_mask=False, max_length=max_length - len(prefix_tokens) - len(suffix_tokens)
    )
    for i, ele in enumerate(inputs['input_ids']):
        inputs['input_ids'][i] = prefix_tokens + ele + suffix_tokens
    inputs = tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=max_length)
    for key in inputs:
        inputs[key] = inputs[key].to(model.device)
    return inputs

@torch.no_grad()
def compute_logits(inputs, **kwargs):
    batch_scores = model(**inputs).logits[:, -1, :]
    true_vector = batch_scores[:, token_true_id]
    false_vector = batch_scores[:, token_false_id]
    batch_scores = torch.stack([false_vector, true_vector], dim=1)
    batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
    scores = batch_scores[:, 1].exp().tolist()
    return scores


In [6]:
model_checkpoint = "Qwen/Qwen3-Reranker-0.6B"
## left padding side is chosen for every CausalLM since we want to generate a token after the prompt ends
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, padding_side = "left")
model = AutoModelForCausalLM.from_pretrained(model_checkpoint).eval()


prefix_tokens = tokenizer.encode(prefix)
suffix_tokens = tokenizer.encode(suffix)

task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = ["Which is a fact about India"]
documents = ["The capital of China is Beijing", "India is the largest democracy in the world"]

pairs = [format_instruction(task, q, d) for q,d in itertools.product(queries,documents)]
inputs = process_inputs(pairs)

print(model(**inputs).logits.shape)
## The model output has shape of 2, 90, 151_669 becuase model predicts output for the next token for each of the token in the prompt
with torch.no_grad():
    model_logits = model(**inputs).logits[:,-1,[token_true_id, token_false_id]]

import torch.nn.functional as F
print(model_logits)
F.softmax(model_logits, dim =-1)


You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


torch.Size([2, 86, 151669])
tensor([[3.9185, 3.7649],
        [2.4491, 4.7123]])


tensor([[0.5383, 0.4617],
        [0.0942, 0.9058]])