Skip to content

Examples Python Development

Mike edited this page May 28, 2026 · 2 revisions

Development

Native OpenAI-compatible quick dev

from openai import OpenAI
import xlocllm

llm = xlocllm.unit("LLM", "Qwen-3.5-0.8b", quant="q4")

with xlocllm.runtime([llm], mode="native") as rt:
    rt.run()
    client = OpenAI(base_url=rt.url, api_key="xlocllm")
    answer = client.chat.completions.create(
        model=llm.model,
        messages=[{"role": "user", "content": "Generate three pytest cases for a date parser."}],
        temperature=0,
    )
    print(answer.choices[0].message.content)

Local RAG during app development

emb = xlocllm.unit("embedding", "multilingual-e5-small")
rag = xlocllm.rag(emb=emb, name="dev-docs")
llm = xlocllm.unit("LLM", "Qwen-3.5-0.8b", rag=rag)

with xlocllm.runtime([llm]) as rt:
    rt.run()
    rag.add(["The dev API uses runtime.chat for quick checks."], ids=["api"])
    print(rt.chat("Which API is used for quick checks?"))

Switch modes in one script

with xlocllm.native:
    native_llm = xlocllm.unit("LLM", "Qwen-3.5-0.8b")

with xlocllm.web:
    browser_clf = xlocllm.unit(
        "text-classification",
        "Xenova/distilbert-base-uncased-finetuned-sst-2-english",
    )

Custom sklearn classifier

# sklearn_model должен быть fitted estimator
unit = xlocllm.unit(sklearn_model, type="text-classification", name="local-clf")
with xlocllm.runtime([unit]) as rt:
    rt.run()
    print(unit.predict([[0.1, 0.2, 0.3]]))

Clone this wiki locally