In [48]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# To load a pretrained model and a tokenizer using HuggingFace we only need two line of code
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Let's test the model
text = "Udacity is the best place to learn about generative"
inputs = tokenizer(text, return_tensors="pt")

# Show the tokens as numbers, i.e. "input_ids"
print(inputs["input_ids"])

tensor([[  52,   67, 4355,  318,  262, 1266, 1295,  284, 2193,  546, 1152,  876]])


## Step 2. Examine the tokenization
### Let's explore what tokens mean!

In [49]:
# Show hoe the sentence looks like
import pandas as pd

def show_tokenization(inputs):
    return pd.DataFrame(
        [(id, tokenizer.decode(id)) for id in inputs["input_ids"][0]],
        columns=["id", "token"],
    )
    
show_tokenization(inputs)
    

Unnamed: 0,id,token
0,tensor(52),U
1,tensor(67),d
2,tensor(4355),acity
3,tensor(318),is
4,tensor(262),the
5,tensor(1266),best
6,tensor(1295),place
7,tensor(284),to
8,tensor(2193),learn
9,tensor(546),about


## Step 2. Calculate the probabilty of the next token
Now lets use PyTorch to calculate the probability of the next token given the previous ones

In [50]:

import torch

with torch.no_grad():
    logits = model(**inputs).logits[:, -1, :]
    probabilities = torch.nn.functional.softmax(logits[0], dim=-1)


def show_next_token_choices(probabilities, top_n=5):
    return pd.DataFrame(
        [
            (id, tokenizer.decode(id), p.item())
            for id, p in enumerate(probabilities)
            if p.item()
        ],
        columns=["id", "token", "p"],
    ).sort_values("p", ascending=False)[:top_n]


show_next_token_choices(probabilities)

Unnamed: 0,id,token,p
8300,8300,programming,0.157587
4673,4673,learning,0.148417
4981,4981,models,0.048505
17219,17219,biology,0.046481
16113,16113,algorithms,0.027796


Intrresting! The model thinks that the most likely next work is  "programming" followed up colesly by "learning"

In [51]:
# Obtain the token id for the most probable next token
next_token_id = torch.argmax(probabilities).item()

print(f"Next token id: {next_token_id}")
print(f"Next token: {tokenizer.decode(next_token_id)}")

Next token id: 8300
Next token:  programming


In [52]:
# We append the most likely next token to the input
text =  text + tokenizer.decode(8300)
text

'Udacity is the best place to learn about generative programming'

# Step 3. Now lets generate meore tokens

In [53]:
from IPython.display import display, Markdown

# show the text
print(text)

# convert to tokens
inputs = tokenizer(text, return_tensors="pt")

print(inputs["input_ids"])

# Calculate the probabilities for the next token and show the top 5 choices
with torch.no_grad():
    logits = model(**inputs).logits[:, -1, :]
    probabilities = torch.nn.functional.softmax(logits[0], dim=-1)

display(Markdown("**Next token probabilities:**"))
display(show_next_token_choices(probabilities))

# Choose the most likely token id and add it to the text
next_token_id = torch.argmax(probabilities).item()
text = text + tokenizer.decode(next_token_id)

print(text)

Udacity is the best place to learn about generative programming
tensor([[  52,   67, 4355,  318,  262, 1266, 1295,  284, 2193,  546, 1152,  876,
         8300]])


**Next token probabilities:**

Unnamed: 0,id,token,p
13,13,.,0.352228
11,11,",",0.135984
290,290,and,0.109371
287,287,in,0.06953
8950,8950,languages,0.058288


Udacity is the best place to learn about generative programming.


In [54]:
# Step 4. Use the generate method
# Start with some text and tokenize it


### Lets use the gerneragte method
### Start with some text and tokenize it


In [61]:
#stqart with some text and tokenize it
text = "One upon a time, generative models were"
inputs = tokenizer(text, return_tensors="pt")

# use the 'generate method' to generate the next token
output = model.generate(**inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)

#Show the generated text
display(Markdown(tokenizer.decode(output[0])))

One upon a time, generative models were used to predict the future. In the early 20th century, the first generative models were developed to predict the future. The first generative models were based on the idea that the future is a series of events, and that the future is a series of events. The first generative models were based on the idea that the future is a series of events, and that the future is a series of events. The first generative models were based on