In [1]:
# Import libraries
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [2]:
# Check for GPU and set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Set eos_token as pad_token for the tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load the model, set pad_token_id and move to device
model = AutoModelForCausalLM.from_pretrained(
	"gpt2", 
	pad_token_id=tokenizer.eos_token_id,
	torch_dtype=torch.float16,
	device_map="auto"
)

In [4]:
eos_tok = tokenizer.eos_token_id
print(f"EOS token id: {eos_tok}")
print(f"EOS token: {tokenizer.decode(eos_tok)}")
print(f"Pad token: {tokenizer.pad_token}")

EOS token id: 50256
EOS token: <|endoftext|>
Pad token: <|endoftext|>


In [5]:
batch = tokenizer(["Hello World", "Hello"], return_tensors="pt", padding=True, truncation=True)
print(batch)
print(tokenizer.encode("<|endoftext|>"))

{'input_ids': tensor([[15496,  2159],
        [15496, 50256]]), 'attention_mask': tensor([[1, 1],
        [1, 0]])}
[50256]


In [6]:
# Example input
prompt = "The future of AI is"

# Tokenize the input and move to the device
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

In [7]:
# Generate the output
output = model.generate(
    **inputs,
	max_new_tokens=50, 
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id 
)

In [8]:
print(output.size())

torch.Size([1, 55])


In [9]:
# Decode the output
response = tokenizer.decode(output[0], skip_special_tokens=True)


In [10]:
print(response)

The future of AI is in the hands of the next generation of AI.

The future of AI is in the hands of the next generation of AI.

The future of AI is in the hands of the next generation of AI.

The future of AI
