In [None]:
# Import necessary libraries
import time
import psutil
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Monitor system resources
def monitor_memory():
    memory = psutil.virtual_memory()
    print(f"Memory usage: {memory.percent}%")
    if memory.percent > 80:  # Warning threshold
        print("⚠️ Warning: High memory usage!")

def monitor_disk():
    disk = psutil.disk_usage('/')
    print(f"Disk usage: {disk.percent}%")
    if disk.percent > 90:  # Warning threshold
        print("⚠️ Warning: High disk usage!")


In [None]:
# Define the base test prompt
BASE_TEST_PROMPT = '''
Act as a Math teacher for primary school. Students will ask you some basic Math questions.
Try to answer the questions by breaking down the problem step by step and finally give the answer.
Remember that your target audience is 6 to 10 years old, so answer in a very basic manner.
'''

def get_full_prompt(question: str):
    return f"{BASE_TEST_PROMPT}\n\nQuestion: {question}\nAnswer:"


In [None]:
# Example dataset for testing
QUESTIONS = [
    "What is 15 + 7? What if you take away 5 from the answer?",
    "Lily has 18 apples. She gives 7 to her friend. How many apples does she have left?",
    "If you have 3 baskets with 4 apples in each, how many apples do you have altogether?",
]

ALL_PROMPTS = [get_full_prompt(question) for question in QUESTIONS]


In [None]:
# Load the Qwen model and tokenizer
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

print("Loading the model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto" if torch.cuda.is_available() else None
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Model loaded successfully!")


Loading the model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.32k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Model loaded successfully!


In [None]:
def chat_with_qwen(prompt):
    # Construct the messages with the required roles
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": prompt}
    ]

    # Apply chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Tokenize and generate response
    model_inputs = tokenizer([text], return_tensors="pt").to(device)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,  # Limit to reasonable token count for faster response
        pad_token_id=tokenizer.pad_token_id  # Ensure proper padding
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    # Decode and return the response
    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]



In [None]:
def demo_live_prompt():
    """
    Allows user to input a single question and get a real-time response from the model.
    """
    print("\n=== Live Demo ===")
    user_question = input("Enter your math question: ")

    response = chat_with_qwen(user_question)

    print("\n=== Output ===")
    print(f"**Question:** {user_question}")
    print(f"**Answer:** {response}")



In [None]:
# Main function for running the demo
if __name__ == "__main__":
    print("Monitoring system resources before starting...")
    monitor_memory()
    monitor_disk()
    print("Setup complete! Ready for live demo.\n")

    # Run the live prompt demo
    demo_live_prompt()


Monitoring system resources before starting...
Memory usage: 18.1%
Disk usage: 31.5%
Setup complete! Ready for live demo.


=== Live Demo ===
