# Setup the model hosted by Triton+vLLM

In [7]:
from transformers import AutoTokenizer
import requests
import json
import re
import gradio as gr

# Define the Model name
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Set the URL and the data for the POST request
url = "http://localhost:8000/v2/models/vllm_model/generate"

# Set the headers for the request
headers = {
    "Content-Type": "application/json"
}

# Define functions

In [9]:
def preprocess_input(input_text):
    # Prepare your prompt
    prompt = input_text
    messages = [
        {"role": "system", "content": "You are an expert translator who translates text from english to french and only return translated text."},
        {"role": "user", "content": prompt},
    ]
    
    # Apply the message template
    tokenized_input = tokenizer.apply_chat_template(messages, tokenize=False)
    data = {
    "text_input": tokenized_input,
    "parameters": {"stream": False, "max_tokens": 512, "temperature": 0.3, "top_p": 0.8, "repetition_penalty": 1.05, "top_k": 40}
    }
    return data

def send_infer(data):
    # Send the POST request
    return requests.post(url, headers=headers, data=json.dumps(data))

def postprocess_output(response):
    # Extract the text output
    text_output = response.json()['text_output']
    
    # Use regex to find everything after the "</think>" marker
    match = re.search(r"</think>\s*(.*)", text_output, re.DOTALL)
    final_answer = match.group(1).strip() if match else text_output.strip()
    return final_answer

# Setup Gradio App

In [13]:
def translate(text):
    data = preprocess_input(text)
    response = send_infer(data)
    return postprocess_output(response)

demo = gr.Interface(
    fn=translate,
    inputs=gr.Textbox(label="original"),
    outputs=gr.Textbox(label="translated"),
    title="A funky English to French Translator",
    description="Enter text in the 'original' box to see its 'translated' version."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


