# Story Teller

In [None]:
%pip install transformers
%pip install torch
%pip install accelerate

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

Next, we configure the code by setting constants and hyper-parameters

In [None]:
K_NAME = 'namer'
K_HF_CARD_URL = 'url'
K_CHAT_PROMPT = 'is_chat_prompt'

model_configs = {
    # REF: https://huggingface.co/blog/how-to-generate
    'gpt2': {
        K_NAME: 'gpt2',
        K_HF_CARD_URL: None,
        K_CHAT_PROMPT: False
    },
    # REF: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
    'llama-3-8b-instruct': {
        K_NAME: 'meta-llama/Meta-Llama-3-8B-Instruct',
        K_HF_CARD_URL: 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct',
        K_CHAT_PROMPT: True
    }
}

# Select the model
config = 'llama-3-8b-instruct'

model_id = model_configs[config][K_NAME]
#print(f'Using model: {model_id}')
model_id

'meta-llama/Meta-Llama-3-8B-Instruct'

In [None]:
INPUT_TEXT = 'Alice dreamed that what if the wonderland was actually a wonder sea, with the kingdom full of mermaids and'

In [None]:
# Huggingface Login
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

match(config):
    case 'gpt2':
        tokenizer.pad_token = tokenizer.eos_token

        # add the EOS token as PAD token to avoid warnings
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16)

        tokens = tokenizer(INPUT_TEXT, return_tensors='pt').to(model.device.type)

        outputs = model.generate(
            **tokens)
    case 'llama-3-8b-instruct':
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            pad_token_id=tokenizer.eos_token_id)  # https://stackoverflow.com/questions/69609401/suppress-huggingface-logging-warning-setting-pad-token-id-to-eos-token-id

        messages = [
            {"role": "system", "content": "You're an expert children story author who can add twist to classic tales."},
            {"role": "user", "content": INPUT_TEXT},
        ]

        input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to(model.device)

        terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]

model.device

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



device(type='cuda', index=0)

In [None]:
match(config):
    case 'gpt2':
        outputs = model.generate(
            **tokens,
            max_new_tokens=40,
            eos_token_id=tokenizer.eos_token_id)
        response = outputs[0]
    case 'llama-3-8b-instruct':
        outputs = model.generate(
            input_ids,
            max_new_tokens=1024,
            eos_token_id=terminators,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
        )
        response = outputs[0][input_ids.shape[-1]:]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


In [None]:
llm_answer = tokenizer.decode(response, skip_special_tokens=True)
llm_answer

"Arrrr, shiver me timbers! Ye be tellin' me ye enjoy takin' yer wee pup fer a stroll, eh? That be a grand way to spend yer day, mate"