<a href="https://colab.research.google.com/github/prathimamp/Giet_Day2_programs/blob/main/Copy_of_Notebook_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Explain the LLM at a high level: words ==> Tokens ==> Next Token ==> Next word

# Load the GPT2 Model

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

# Step 1: Load the tokenizer and the model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [None]:
# Step 2: Define an input sentence
input_text = "The AI class by IIT Roorkee is a bit"


# Tokenize the input text
input_tokens = tokenizer(input_text, return_tensors="pt")  # Convert to PyTorch tensors

In [None]:
print(f"Tokenized Input IDs: {input_tokens['input_ids']}")

Tokenized Input IDs: tensor([[  464,  9552,  1398,   416,   314,  2043,   371,  2675, 11035,   318,
           257,  1643]])


In [None]:
# Decode tokens to see the mapping
decoded_tokens = [tokenizer.decode(token_id) for token_id in input_tokens['input_ids'][0]]
print(f"Decoded Tokens: {decoded_tokens}")

# Another Sentence

In [None]:
# Step 2: Define an input sentence
input_text = "The AI class of my college is a bit"

# Tokenize the input text
input_tokens = tokenizer(input_text, return_tensors="pt")  # Convert to PyTorch tensors

print(f"Tokenized Input IDs: {input_tokens['input_ids']}")


# Decode tokens to see the mapping
decoded_tokens = [tokenizer.decode(token_id) for token_id in input_tokens['input_ids'][0]]
print(f"Decoded Tokens: {decoded_tokens}")

Tokenized Input IDs: tensor([[ 464, 9552, 1398,  286,  616, 4152,  318,  257, 1643]])
Decoded Tokens: ['The', ' AI', ' class', ' of', ' my', ' college', ' is', ' a', ' bit']


# Predict the next token
- GPT-2 doesn't just predict the next token. For every token in the input, it generates logits predicting the next token relative to the context up to that position.

In [None]:
# Step 3: Predict the next token
with torch.no_grad():
    outputs = model(**input_tokens)
    logits = outputs.logits  # Raw predictions from the model

print('input_tokens:', input_tokens['input_ids'].shape)
print('logits:', logits.shape)

input_tokens: torch.Size([1, 9])
logits: torch.Size([1, 9, 50257])


In [None]:
# Get the probabilities for the next token
next_token_logits = logits[:, -1, :]  # Focus on the last token's logits
next_token_logits.shape

torch.Size([1, 50257])

In [None]:
probabilities = torch.softmax(next_token_logits, dim=-1)

# Get the top 5 most likely tokens
top_k = 5
top_k_probs, top_k_indices = torch.topk(probabilities, top_k, dim=-1)

# Decode the top tokens
top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_k_indices[0]]
top_probs = [prob.item() for prob in top_k_probs[0]]

print("\nTop Predictions:")
for i in range(top_k):
    print(f"{i + 1}: Token: '{top_tokens[i]}', Probability: {top_probs[i]:.4f}")


Top Predictions:
1: Token: 'different', Probability: 0.1789
2: Token: 'of', Probability: 0.1364
3: Token: 'more', Probability: 0.0910
4: Token: 'like', Probability: 0.0523
5: Token: 'weird', Probability: 0.0147


In [None]:
# Step 4: Generate the next word and map back to sentence
next_token_id = top_k_indices[0][0].item()  # Choose the most likely token
generated_text = tokenizer.decode(input_tokens["input_ids"][0].tolist() + [next_token_id])
print(f"\nGenerated Text: {generated_text}")


Generated Text: The AI class by IIT Roorkee is a bit different


# Another Example

In [None]:
# Step 2: Define an input sentence
input_text = "I saw a cat on a"

# Tokenize the input text
input_tokens = tokenizer(input_text, return_tensors="pt")  # Convert to PyTorch tensors

print(f"Tokenized Input IDs: {input_tokens['input_ids']}")


# Decode tokens to see the mapping
decoded_tokens = [tokenizer.decode(token_id) for token_id in input_tokens['input_ids'][0]]
print(f"Decoded Tokens: {decoded_tokens}")

# Step 3: Predict the next token
with torch.no_grad():
    outputs = model(**input_tokens)
    logits = outputs.logits  # Raw predictions from the model

print('input_tokens:', input_tokens['input_ids'].shape)
print('logits:', logits.shape)

# Get the probabilities for the next token
next_token_logits = logits[:, -1, :]  # Focus on the last token's logits
next_token_logits.shape

probabilities = torch.softmax(next_token_logits, dim=-1)

# Get the top 5 most likely tokens
top_k = 5
top_k_probs, top_k_indices = torch.topk(probabilities, top_k, dim=-1)

# Decode the top tokens
top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_k_indices[0]]
top_probs = [prob.item() for prob in top_k_probs[0]]

print("\nTop Predictions:")
for i in range(top_k):
    print(f"{i + 1}: Token: '{top_tokens[i]}', Probability: {top_probs[i]:.4f}")

Tokenized Input IDs: tensor([[  40, 2497,  257, 3797,  319,  257]])
Decoded Tokens: ['I', ' saw', ' a', ' cat', ' on', ' a']
input_tokens: torch.Size([1, 6])
logits: torch.Size([1, 6, 50257])

Top Predictions:
1: Token: 'leash', Probability: 0.0812
2: Token: 'street', Probability: 0.0452
3: Token: 'beach', Probability: 0.0287
4: Token: 'tree', Probability: 0.0236
5: Token: 'porch', Probability: 0.0202
