In [1]:
! pip install openai



In [2]:
from openai import OpenAI

In [3]:
def init_llm(base_url: str = "http://127.0.0.1:8000/v1", api_key: str = "empty", model: str = None):
        """Initialize with required API key and model name. 
        Args:
            base_url(str): url where vLLM server is running
            api_key (str): API key
            model (str): Model served using vLLM (e.g., Qwen/Qwen2.5-1.5B-Instruct)
        """  
        llm = OpenAI(base_url=base_url, api_key=api_key)
        model = model
        return llm, model
        

def invoke(llm, model, messages: list) -> str:
        """Get complete response.
        Args:
            messages (list): List of messages in the chat format.
        returns: str: Complete response from the LLM
        """
        response = llm.chat.completions.create( 
        model=model,
        messages=messages,
        seed=42
    )
        return response.choices[0].message.content

def stream(llm, model, messages: list):
    """Stream response token by token.
    Args:
        messages (list): List of messages in the chat format.
    yields: str: Yields response token by token.
    """
    stream = llm.chat.completions.create(
        model=model,
        messages=messages,
        stream=True,
        seed=42
    )
    for chunk in stream:
        if content := chunk.choices[0].delta.content:
            yield content

In [4]:
# Testing vLLM server
llm, model = init_llm(base_url="http://127.0.0.1:8000/v1",
                       api_key="empty")
messages=[{"role": "user", "content": "Explain quantum computing simply."}]
response = invoke(llm, model, messages)


In [5]:
response

'Quantum computing refers to the science and technology of manipulating quantum systems to solve complex problems that are currently out of reach for classical computing technologies. Essentially, it is a branch of computing that involves using the properties of quantum mechanics to perform computations using qubits, which are quantum bits (or "bits").\n\nOne of the distinct features of quantum computing is that it operates on the principles of quantum mechanics, rather than classical mechanics, such as Newtonian laws of motion. This quantum weirdness means that mathematical problems that are traditionally considered intractable by classical physics can be solved using quantum mechanics, and indeed, quantum computing has shown considerable promise for large-scale problems in fields such as cryptography, drug design, and optimization.\n\nIn terms of application scenarios, quantum computing could potentially play a small but valuable role in scientific and financial applications such as 

In [6]:
response = ""
for chunk in stream(llm, model, messages):
    response += chunk
    print(chunk, end="", flush=True)

Quantum computing refers to the science and technology of manipulating quantum systems to solve complex problems that are currently out of reach for classical computing technologies. Essentially, it is a branch of computing that involves using the properties of quantum mechanics to perform computations using qubits, which are quantum bits (or "bits").

One of the distinct features of quantum computing is that it operates on the principles of quantum mechanics, rather than classical mechanics, such as Newtonian laws of motion. This quantum weirdness means that mathematical problems that are traditionally considered intractable by classical physics can be solved using quantum mechanics, and indeed, quantum computing has shown considerable promise for large-scale problems in fields such as cryptography, drug design, and optimization.

In terms of application scenarios, quantum computing could potentially play a small but valuable role in scientific and financial applications such as medic