In [1]:
import os
import sys

from pydantic import BaseModel
import numpy as np

from ragu.llm.llm import CachedOpenAI

In [2]:
from ragu.common.logger import logger
logger.add(sys.stderr, level="DEBUG")

2

In [2]:
llm = CachedOpenAI(
    base_url=os.environ['VSEGPT_BASE_URL'],
    api_key=os.environ['VSEGPT_KEY'],
    cache='cache_dir/',
    rate_min_delay=1,
    retry_times_sec=(1, 2, 4),
)

In [5]:
class Response(BaseModel):
    answer: str
    time: str
    jokes: list[str]
    chipi_chipi_chapa_chapa: list[str]

answer = await llm.chat_completion(
    'openai/gpt-3.5-turbo',
    [{'role': 'user', 'content': 'Hi! Fill the fields!'}],
    output_schema=Response
)
print(answer.model_dump_json(indent=2))

{
  "answer": "Sure! What information would you like me to fill in?",
  "time": "2023-10-31T12:00:00Z",
  "jokes": [
    "Why did the scarecrow win an award? Because he was outstanding in his field!",
    "What do you call fake spaghetti? An impasta!",
    "Why donâ€™t scientists trust atoms? Because they make up everything!"
  ],
  "chipi_chipi_chapa_chapa": [
    "Chipi chipi, now let's get started!",
    "Chapa chapa, are you ready for some fun?",
    "Chipi chipi, here comes the excitement!"
  ]
}


In [6]:
import asyncio

embeddings = await asyncio.gather(*[
    llm.embed_text('emb-qwen/qwen3-embedding-8b', f'Pepe watafa{i}')
    for i in range(3)
])
print(np.array(embeddings))

[[ 0.00958252  0.01318359  0.01171875 ...  0.00283813  0.00524902
  -0.00170135]
 [ 0.00830078  0.01177979  0.01507568 ...  0.00154877  0.00328064
   0.00017452]
 [ 0.01306152  0.0123291   0.01495361 ...  0.00396729  0.00613403
  -0.00294495]]
