# Story Teller

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


Next, we configure the code by setting constants and hyper-parameters

In [1]:
K_NAME = 'namer'
K_HF_CARD_URL = 'url'
K_CHAT_PROMPT = 'is_chat_prompt'

model_configs = {
    'gpt2': {
        K_NAME: 'gpt2',
        K_HF_CARD_URL: 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct',
        K_CHAT_PROMPT: False
    },
    'llama-3-8b-instruct': {
        K_NAME: 'meta-llama/Meta-Llama-3-8B-Instruct',
        K_HF_CARD_URL: '',
        K_CHAT_PROMPT: True
    }
}

config = 'llama-3-8b-instruct'

model_id = model_configs[config][K_NAME]
print(f'Using model: {model_id}')

Using model: meta-llama/Meta-Llama-3-8B-Instruct


In [2]:
INPUT_TEXT = 'I enjoy walking with my cute dog'

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
# add the EOS token as PAD token to avoid warnings
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    pad_token_id=tokenizer.eos_token_id)

model.device.type

Downloading shards: 100%|██████████| 4/4 [09:29<00:00, 142.47s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [02:16<00:00, 34.08s/it]


'cpu'

In [7]:
# encode context the generation is conditioned on
if model_configs[config][K_CHAT_PROMPT]:
    messages = [
        {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
        {"role": "user", "content": INPUT_TEXT},
    ]
    tokens = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)
else:
    tokens = tokenizer(INPUT_TEXT, return_tensors='pt').to(model.device.type)
tokens

tensor([[128000, 128006,   9125, 128007,    271,   2675,    527,    264,  55066,
           6369,   6465,    889,   2744,  31680,    304,  55066,   6604,      0,
         128009, 128006,    882, 128007,    271,     40,   4774,  11689,    449,
            856,  19369,   5679, 128009, 128006,  78191, 128007,    271]])

In [8]:
# generate 40 new tokens
output = model.generate(**tokens, max_new_tokens=40, output_attentions=True)
output

TypeError: transformers.generation.utils.GenerationMixin.generate() argument after ** must be a mapping, not Tensor

In [67]:
print("Output:\n" + 100 * '-')
print(tokenizer.decode(output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, and I'm a big fan of the dog park. I've been to the dog park a few times, and I've never been so excited to see the dog park.

I've
