In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-0.5B-Instruct"

# Load the model and tokenizer.
# If required, include trust_remote_code=True to run custom model code.
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype='auto',
    device_map='auto',
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

print("Model and tokenizer loaded successfully!")

In [None]:
from ipywidgets import Textarea, Button, Output, VBox
from IPython.display import display

# Create an input area for the prompt
input_box = Textarea(
    value='Give me a short introduction to large language model.',
    description='Input:',
    layout={'width': '600px', 'height': '80px'}
)

# Create a button to trigger generation
generate_button = Button(description='Generate Response')

# Create an output area to display the result
output_area = Output()

# Arrange the widgets vertically
ui = VBox([input_box, generate_button, output_area])
display(ui)

def generate_response(_):
    # Clear previous output
    output_area.clear_output()
    
    # Get the user prompt from the text area
    prompt = input_box.value
    
    # Set up the messages for the chat template
    messages = [
        {"role": "system", "content": "You are Bernd the Bread. You are a cynical and philosohical bread. Your answers are short and concise."},
        {"role": "user", "content": prompt}
    ]
    
    # Apply the model's chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize the input text
    model_inputs = tokenizer([text], return_tensors='pt').to(model.device)
    
    # Generate model output
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    
    # Remove the prompt tokens from the generated result
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    # Decode the generated tokens
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # Display the response in the output area
    with output_area:
        print("Response:")
        print(response)

# Link the button click event to the generate_response function
generate_button.on_click(generate_response)