In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import TextStreamer

In [3]:
model_checkpoint = "Qwen/Qwen3-Reranker-0.6B"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

In [5]:
raw_inputs = ["I love deep learning!","I hate this so much!"]

## tokenize without any additional argument
case_1 = tokenizer(raw_inputs)

## tokenizer with default padding
case_2 = tokenizer(raw_inputs, padding = True)

## tokenizer with left padding 
case_3 = tokenizer(raw_inputs, padding = True, padding_side = 'left')

## returning pytorch tensor
case_4 = tokenizer(raw_inputs, padding = True, padding_side='left', return_tensors='pt')

print(
    f"""
    {case_1 = }
    {case_2 = }
    {case_3 = }
    {case_4 = }
    """
)


    case_1 = {'input_ids': [[40, 2948, 5538, 6832, 0], [40, 12213, 419, 773, 1753, 0]], 'attention_mask': [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]]}
    case_2 = {'input_ids': [[40, 2948, 5538, 6832, 0, 151643], [40, 12213, 419, 773, 1753, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1]]}
    case_3 = {'input_ids': [[151643, 40, 2948, 5538, 6832, 0], [40, 12213, 419, 773, 1753, 0]], 'attention_mask': [[0, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]]}
    case_4 = {'input_ids': tensor([[151643,     40,   2948,   5538,   6832,      0],
        [    40,  12213,    419,    773,   1753,      0]]), 'attention_mask': tensor([[0, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1]])}
    


## What happens within the tokenizer?

In [6]:
tokens = tokenizer.tokenize(raw_inputs[0])
tokens

['I', 'Ġlove', 'Ġdeep', 'Ġlearning', '!']

In [7]:
token_ids = tokenizer.convert_tokens_to_ids(tokens)
token_ids

[40, 2948, 5538, 6832, 0]

In [8]:
model_prepped_ids = tokenizer.prepare_for_model(token_ids)
model_prepped_ids

You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'input_ids': [40, 2948, 5538, 6832, 0], 'attention_mask': [1, 1, 1, 1, 1]}

## Tokenizer chat template


In [9]:
chat_template = tokenizer.chat_template
print(chat_template)

{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\n\n' }}
    {%- endif %}
    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
    {%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
    {%- set index = (messages|length - 

### Basic conversation & System Prompts

In [10]:
sample_prompt = [
    {"role":'system',"content": "You are a chatbot expert at mathematics. You will return the final output of the question asked and no other text"},
    {"role": 'user', "content": "Hello! What is 8+8?"}
]
tokens_chat_template = tokenizer.apply_chat_template(sample_prompt, add_generation_prompt = True, return_tensors="pt")

# Define the streamer
streamer = TextStreamer(tokenizer, skip_prompt=True)
## Generate Tokens
model_response = model.generate(
    tokens_chat_template,
    streamer=streamer,
    max_new_tokens = 2048,
    do_sample = True
)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<think>
Okay, so the user is asking what 8 plus 8 is. Let me think. Well, in math, when you add two numbers that are both 8, it's straightforward. But wait, is there any context I'm missing? Like maybe in a different context like a game or something? No, in the context of basic arithmetic, I should just give the answer directly. The result of 8 + 8 is 16. That's simple and correct. I don't have any doubts about this.
</think>

16<|im_end|>


## Using Models

In [25]:
from transformers import pipeline

In [26]:
classifier = pipeline("sentiment-analysis")
classifier(
    [
        "I love deep learning!",
        "I hate this so much"
    ]
)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


[{'label': 'POSITIVE', 'score': 0.9998645782470703},
 {'label': 'NEGATIVE', 'score': 0.9995144605636597}]

In [30]:
text_generator = pipeline("text-generation")
print(text_generator(
    [
        "I went to the store to buy",
    ]
)[0][0]['generated_text'])

No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


I went to the store to buy some books, and I bought them all in one go. The book was a book of a very different kind than the one I had been reading for a few weeks.

They were all on page four. The black book was on page two. I read it and I felt quite good about that. My wife said it was interesting, and we were going to have a conversation about it, and she said that it was a very good book.

I had read this book about the book and I thought, "Well, what do I think? What is the point of this book?" I started doing some research, and I said, "Well, I'm curious about it, and I can look at it and find out what's going on." I wanted to know what is going on with the book. I wanted to know what is going on with the whole thing.

And I went to the book shop and I said, "Well, how do I go about going through it?" and she said, "Well, you can go through it in two steps. First, first, you can take a look at it and see what's going on. Second, you can look at the cover. The book is one of the

## Access the bert model

In [34]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

m = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(m)
model = AutoModelForSequenceClassification.from_pretrained(m)

In [37]:
inputs = tokenizer("I love deep learning!", padding= True, truncation = True, return_tensors='pt')
inputs

{'input_ids': tensor([[ 101, 1045, 2293, 2784, 4083,  999,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}

In [38]:
model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-4.2898,  4.6176]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

## Extract Embeddings 

In [55]:
from transformers import AutoModel

model = AutoModel.from_pretrained(m)

outputs = model(**inputs)
embeddings = outputs.last_hidden_state

print(embeddings.shape)
sentence_representation = torch.mean(embeddings, dim= 1)

print(sentence_representation.shape)

torch.Size([1, 7, 768])
torch.Size([1, 768])


## Saving a model 

In [57]:
model.save_pretrained('my_model')