# 03.1 - Transformer Models - Query

How to load and query a Hugging Face Transformer model.

In [None]:
# !pip install --quiet --upgrade transformers
!pip install --quiet --upgrade git+https://github.com/huggingface/transformers

In [None]:
model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"

model_kwargs = {
    "torch_dtype": "auto"
}

In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    **model_kwargs
).to("cuda")

In [None]:
import utils
utils.print_model_info(model)

In [None]:
generate_kwargs = {
    "max_new_tokens": 1024,
    "do_sample": True,
    "temperature": 0.2,
    "top_k": 50,
    "top_p": 0.9,
    "bos_token_id": tokenizer.bos_token_id,
    "pad_token_id": tokenizer.eos_token_id,
    "eos_token_id": tokenizer.eos_token_id
}

In [None]:
%%time

prompt = """
What is the capital of France?
"""

messages = [
    {"role": "user", "content": prompt}
]

input_text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

outputs = model.generate(
    input_text,
    **generate_kwargs
)

response = outputs[0][input_text.shape[-1]:]

print(tokenizer.decode(response, skip_special_tokens=True) + '\n')

In [None]:
%%time

system_prompt = """
Provide a concise summary of the provided text.
"""

prompt = """
There once was a man from Nantucket,
who, for fear or rain, kept a bucket.
He was thinking one day,
that there's no rain today,
so he looked at his bucket then chucked it.
"""

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": prompt}
]

input_text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

outputs = model.generate(
    input_text,
    **generate_kwargs
)

response = outputs[0][input_text.shape[-1]:]

print(tokenizer.decode(response, skip_special_tokens=True) + '\n')