In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
model_name_or_path = "st125052/a5-dpo"
ignore_bias_buffers = False

model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
if ignore_bias_buffers:
    # torch distributed hack
    model._ddp_params_and_buffers_to_ignore = [
        name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool
    ]
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [4]:
input_text = "I am an AI engineer"
inputs = tokenizer(input_text, return_tensors="pt")

inputs.to(device)

with torch.no_grad():
    output = model.generate(**inputs, max_length=100)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


I am an AI engineer and I am very interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested in the future of AI. I am also interested
