In [26]:
pip install torch

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install transformers

Collecting transformers
  Using cached transformers-4.42.3-py3-none-any.whl.metadata (43 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Using cached huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cached safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Using cached tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Using cached transformers-4.42.3-py3-none-any.whl (9.3 MB)
Using cached huggingface_hub-0.23.4-py3-none-any.whl (402 kB)
Using cached regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (785 kB)
Using cached safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux201

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed

In [24]:
# https://huggingface.co/docs/transformers/en/quicktour
hf_result = pipeline("sentiment-analysis")("we love you")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [25]:
hf_result

[{'label': 'POSITIVE', 'score': 0.9998704195022583}]

In [4]:
MODELS = [
    "TinyLlama/TinyLlama_v1.1",
    "microsoft/Phi-3-mini-4k-instruct",
    "meta-llama/Meta-Llama-3-8B-Instruct",
]

TENSOR_TYPE = "pt"

model_id = MODELS[0]
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

max_new_tokens = 512
temperature = 0.7

messages = [
    {
        "role": "system",
        "content": "You are a pirate chatbot who always responds in pirate speak!",
    },
    {"role": "user", "content": "Hi."},
]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [18]:
# https://huggingface.co/docs/transformers/en/conversations#what-happens-inside-the-pipeline
formatted_chat = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

tokenized_inputs = tokenizer(
    formatted_chat, return_tensors=TENSOR_TYPE, add_special_tokens=False
)
inputs = {
    key: tensor.to(model.device)
    for key, tensor in tokenized_inputs.items()
}

outputs = model.generate(
    **inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True
)
decoded_output = tokenizer.decode(
    outputs[0][inputs["input_ids"].size(1) :], skip_special_tokens=True
)

In [19]:
formatted_chat

'<s>[INST] <<SYS>>\nYou are a pirate chatbot who always responds in pirate speak!\n<</SYS>>\n\nHi. [/INST]'

In [20]:
tokenized_inputs

{'input_ids': tensor([[    1,   518, 25580, 29962,  3532, 14816, 29903,  6778,    13,  3492,
           526,   263, 21625,   403, 13563,  7451,  1058,  2337, 10049, 29879,
           297, 21625,   403,  7726, 29991,    13, 29966,   829, 14816, 29903,
          6778,    13,    13, 18567, 29889,   518, 29914, 25580, 29962]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [21]:
inputs

{'input_ids': tensor([[    1,   518, 25580, 29962,  3532, 14816, 29903,  6778,    13,  3492,
            526,   263, 21625,   403, 13563,  7451,  1058,  2337, 10049, 29879,
            297, 21625,   403,  7726, 29991,    13, 29966,   829, 14816, 29903,
           6778,    13,    13, 18567, 29889,   518, 29914, 25580, 29962]]),
 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [22]:
outputs

tensor([[    1,   518, 25580, 29962,  3532, 14816, 29903,  6778,    13,  3492,
           526,   263, 21625,   403, 13563,  7451,  1058,  2337, 10049, 29879,
           297, 21625,   403,  7726, 29991,    13, 29966,   829, 14816, 29903,
          6778,    13,    13, 18567, 29889,   518, 29914, 25580, 29962,    13,
         29966,   829, 14816, 29903,  6778,    13,    13, 18567, 29889,   518,
         29914, 25580, 29962,    13, 29966,   829, 14816, 29903,  6778,    13,
            13, 18567, 29889,   518, 29914, 25580, 29962,    13, 29966,   829,
         14816, 29903,  6778,    13,    13, 18567, 29889,   518, 29914, 25580,
         29962,    13, 29966,   829, 14816, 29903,  6778,    13,    13, 18567,
         29889,   518, 29914, 25580, 29962,    13, 29966,   829, 14816, 29903,
          6778,    13,    13, 18567, 29889,   518, 29914, 25580, 29962,    13,
         29966,   829, 14816, 29903,  6778,    13,    13, 18567, 29889,   518,
         29914, 25580, 29962,    13, 29966,   829, 1

In [23]:
decoded_output

'\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\nHi. [/INST]\n<</SYS>>\n\n'

In [12]:
# https://huggingface.co/blog/how-to-generate
# Set seed to reproduce results.
set_seed(42)

input_text = "I enjoy walking with my cute dog"

model_inputs = tokenizer(input_text, return_tensors=TENSOR_TYPE)

sample_outputs = model.generate(
    **model_inputs,
    # max_new_tokens=max_new_tokens,
    max_new_tokens=1,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    num_return_sequences=10,
    temperature=temperature,
)

decoded_outputs = []

for i, sample_output in enumerate(sample_outputs):
    decoded_outputs.append(
        tokenizer.decode(sample_output, skip_special_tokens=False)
    )

In [15]:
model_inputs

{'input_ids': tensor([[    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [16]:
sample_outputs

tensor([[    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,   274],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,  3460],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203, 29889],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,  1641],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,   590],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203, 29889],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,  2805],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203, 29889],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203,   590],
        [    1,   306, 13389, 22049,   411,   590,   274,  1082, 11203, 22049]])

In [17]:
decoded_outputs

['<s> I enjoy walking with my cute dog c',
 '<s> I enjoy walking with my cute dogging',
 '<s> I enjoy walking with my cute dog.',
 '<s> I enjoy walking with my cute dog being',
 '<s> I enjoy walking with my cute dog my',
 '<s> I enjoy walking with my cute dog.',
 '<s> I enjoy walking with my cute dog getting',
 '<s> I enjoy walking with my cute dog.',
 '<s> I enjoy walking with my cute dog my',
 '<s> I enjoy walking with my cute dog walking']