Skip to content

Examples Python Development

Mike edited this page May 28, 2026 · 2 revisions

Development

Native OpenAI-compatible quick dev

from openai import OpenAI
import xlocllm

unit = xlocllm.unit("LLM", "Qwen-3.5-0.8b", quant="q4")
unit1 = xlocllm.unit("embedding", "multilingual-e5-small")

rt = xlocllm.runtime([unit, unit1], mode="native")
rt.run()

# Existing OpenAI-style test code can stay the same after the client address.
client = OpenAI(base_url=rt.url, api_key="xlocllm")
answer = client.chat.completions.create(
    model=unit.model,
    messages=[{"role": "user", "content": "Generate three pytest cases for a date parser."}],
    temperature=0,
)
print(answer.choices[0].message.content)

rt.close()

Local RAG during app development

emb = xlocllm.unit("embedding", "multilingual-e5-small")
rag = xlocllm.rag(emb=emb, name="dev-docs")
llm = xlocllm.unit("LLM", "Qwen-3.5-0.8b", rag=rag)

with xlocllm.runtime([llm]) as rt:
    rt.run()
    rag.add(["The dev API uses runtime.chat for quick checks."], ids=["api"])
    print(rt.chat("Which API is used for quick checks?"))

Switch modes in one script

with xlocllm.native:
    native_llm = xlocllm.unit("LLM", "Qwen-3.5-0.8b")

with xlocllm.web:
    browser_clf = xlocllm.unit(
        "text-classification",
        "Xenova/distilbert-base-uncased-finetuned-sst-2-english",
    )

Custom sklearn classifier

# sklearn_model должен быть fitted estimator
unit = xlocllm.unit(sklearn_model, type="text-classification", name="local-clf")
with xlocllm.runtime([unit]) as rt:
    rt.run()
    print(unit.predict([[0.1, 0.2, 0.3]]))

Clone this wiki locally