In [1]:
from transformers import AutoTokenizer

# Load the tokenizer for the Qwen model
model_name = "Qwen/Qwen3-30B-A3B-Base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Check if <think> token exists in the tokenizer
think_token = "<think>"
if think_token in tokenizer.get_vocab():
    token_id = tokenizer.get_vocab()[think_token]
    print(f"✅ '{think_token}' token found with ID: {token_id}")
else:
    print(f"❌ '{think_token}' token not found in tokenizer vocabulary")

# Also check the reverse - what tokens contain "think"
think_related_tokens = {token: token_id for token, token_id in tokenizer.get_vocab().items() if "think" in token.lower()}
if think_related_tokens:
    print(f"\nThink-related tokens found:")
    for token, token_id in sorted(think_related_tokens.items(), key=lambda x: x[1]):
        print(f"  '{token}': {token_id}")
else:
    print("\nNo think-related tokens found in vocabulary")

# Check vocabulary size
print(f"\nVocabulary size: {len(tokenizer.get_vocab())}")


  from .autonotebook import tqdm as notebook_tqdm


✅ '<think>' token found with ID: 151667

Think-related tokens found:
  'Ġthink': 1744
  'Ġthinking': 7274
  'Ġthinks': 15482
  'ĠThink': 21149
  'think': 26865
  'Think': 38687
  'ĠThinking': 52289
  'Ġthinkers': 68022
  'Ġrethink': 75655
  'thinking': 82260
  'thinkable': 90103
  'ĠTHINK': 92119
  'Thinking': 93945
  'Ġunthinkable': 95503
  'Ġthinker': 97536
  '-thinking': 98951
  '<think>': 151667
  '</think>': 151668

Vocabulary size: 151669


In [5]:
# First, let's make sure IPython magic commands are available
from IPython import get_ipython
if get_ipython() is not None:
    # Enable IPython magic commands
    print("IPython environment detected - magic commands should work")
else:
    print("Not in IPython environment")


IPython environment detected - magic commands should work


In [6]:
# %%writefile test_model.py
import os
from transformers import AutoModelForCausalLM
import torch

# Note: model_name and tokenizer should be defined in a previous cell
model_name = "/home/rishabhtiwari/hf_cache/Qwen--Qwen3-30B-A3B-Base"

# Set cache directory
cache_dir = "/home/rishabhtiwari/hf_cache"
os.makedirs(cache_dir, exist_ok=True)

# Download and load the model
print("Loading model (this may take a while for first download)...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=cache_dir,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

print(f"Model loaded successfully!")
print(f"Model device: {next(model.parameters()).device}")

# Sample math question
math_question = """A train travels 240 miles in 3 hours. If it maintains the same speed, how long will it take to travel 400 miles?"""

# You'll need to import tokenizer and define it properly in your main script
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Prepare the input
messages = [
    {
        "role": "system",
        "content": "Please reason step by step, and put your final answer within \\boxed{{}}."
    },
    {
        "role": "user",
        "content": math_question
    }
]

# Tokenize the input
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(input_text)
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)

print(f"\nInput question: {math_question}")
print(f"\nGenerating response...")

# Generate response
with torch.no_grad():
    output = model.generate(
        input_ids,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

# Decode the response 
response = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
print(f"\nModel response:\n{response}")


Loading model (this may take a while for first download)...


Loading checkpoint shards: 100%|██████████| 16/16 [00:07<00:00,  2.10it/s]


Model loaded successfully!
Model device: cpu
<|im_start|>system
Please reason step by step, and put your final answer within \boxed{{}}<|im_end|>
<|im_start|>user
A train travels 240 miles in 3 hours. If it maintains the same speed, how long will it take to travel 400 miles?<|im_end|>
<|im_start|>assistant


Input question: A train travels 240 miles in 3 hours. If it maintains the same speed, how long will it take to travel 400 miles?

Generating response...
forward top_k: 8, iteration: 100
forward top_k: 12, iteration: 101

Model response:
To solve this problem, we can use the formula: distance = speed x time. We know that the train travels 240 miles in 3 hours, so we can use this information to find the speed of the train. 

Speed = distance / time
Speed = 240 miles / 3 hours
Speed = 80 miles per hour

Now that we know the speed of the train, we can use it to find out how long it will take to travel 400 miles. 

Time = distance / speed
Time = 400 miles / 80 miles per hour
Time = 5 ho

In [12]:
response = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=False)
print(f"\nModel response:\n{response}")



Model response:
Let's solve this step by step.

---

### **Step 1: Find the speed of the train**

We know that:

- Distance = 240 miles  
- Time = 3 hours  

Speed is calculated using the formula:

$$
\text{Speed} = \frac{\text{Distance}}{\text{Time}}
$$

$$
\text{Speed} = \frac{240 \text{ miles}}{3 \text{ hours}} = 80 \text{ miles per hour}
$$

So, the train is traveling at **80 mph**.

---

### **Step 2: Use the speed to find time for 400 miles**

Now, we want to find how long it will take to travel **400 miles** at the same speed (80 mph).

Use the formula:

$$
\text{Time} = \frac{\text{Distance}}{\text{Speed}}
$$

$$
\text{Time} = \frac{400 \text{ miles}}{80 \text{ mph}} = 5 \text{ hours}
$$

---

### ✅ **Final Answer:**

$$
\boxed{5} \text{ hours}
$$

It will take **5 hours** to travel 400 miles at the same speed.<|im_end|>
