In [1]:
%load_ext rich

# LLM Provider
A provider is a class that implements `ProviderProtocol`, serving as an interface to interact with a large language model (LLM). It processes user queries and returns model-generated responses.

```python
class ProviderProtocol(Protocol):
    async def achat(self, history: list[Message]) -> Message: ...
```

Expected Behavior

-  Implements `achat(self, history: list[Message]) -> Message`
-  Accepts a conversation history (list of `Message` objects)
-  Returns a single assistant-generated response (`Message`)

A provider facilitates structured communication with an LLM, ensuring consistency in message handling and response generation.

In [2]:
from promptimus import Message, MessageRole
from promptimus.llms import OpenAILike

In [3]:
# creating a provider
#
# do not forgent to export OPENAI_API_KEY with your token or pass it as api-key argument
provider = OpenAILike(
    model_name="gemma3:4b", base_url="http://lilan:11434/v1", api_key="DUMMY"
)

In [4]:
# using a provider with single message
await provider.achat(
    [
        Message(role=MessageRole.USER, content="Hi, who are you?"),
    ]
)


[1;35mMessage[0m[1m([0m
    [33mrole[0m=[1m<[0m[1;95mMessageRole.ASSISTANT:[0m[39m [0m[32m'assistant'[0m[1m>[0m,
    [33mcontent[0m=[32m"Hi[0m[32m there! I'm Gemma, a large language model created by the Gemma team at Google DeepMind. I'm an open-weights model, which means I'm widely available for public use! \n\nI can take text and images as inputs and generate text as output. \n\nNice to meet you!"[0m,
    [33mtool_calls[0m=[3;35mNone[0m,
    [33mtool_call_id[0m=[3;35mNone[0m
[1m)[0m

In [5]:
# using provider with multiple messages
await provider.achat(
    [
        Message(role=MessageRole.SYSTEM, content="Your name is Mark."),
        Message(role=MessageRole.USER, content="Hi, what is your name?"),
    ]
)


[1;35mMessage[0m[1m([0m
    [33mrole[0m=[1m<[0m[1;95mMessageRole.ASSISTANT:[0m[39m [0m[32m'assistant'[0m[1m>[0m,
    [33mcontent[0m=[32m'Hi there! You can call me Mark. Nice to meet you! 😊 \n\nHow can I help you today?'[0m,
    [33mtool_calls[0m=[3;35mNone[0m,
    [33mtool_call_id[0m=[3;35mNone[0m
[1m)[0m

## OpenAI like and rate limits

In [6]:
from tqdm.auto import tqdm
import asyncio

In [7]:
openai = OpenAILike(
    model_name="gpt-4.1-nano",
    max_concurrency=50,
)

In [8]:
for task in tqdm.as_completed(
    [
        openai.achat(
            [
                Message(role=MessageRole.USER, content="Hi, who are you?"),
            ]
        )
        for _ in range(1_000)
    ]
):
    await task

  0%|          | 0/1000 [00:00<?, ?it/s]

# Embedder
A embedder is a class that implements `EmbedderProtocol`, serving as an interface to generate text embeddings.

```python
Embedding = list[float]


class EmbedderProtocol(Protocol):
    async def aembed_batch(self, texts: list[str], **kwargs: Any) -> list[Embedding]: ...
    async def aembed(self, text: str, **kwargs: Any) -> Embedding: ...
```

In [1]:
import promptimus as pm

In [2]:
embedder = pm.embedders.OpenAILikeEmbedder(
    model_name="mxbai-embed-large", base_url="http://lilan:11434/v1", api_key="DUMMY"
)

In [3]:
embedding = await embedder.aembed("Hi")
len(embedding), embedding[:3]

(1024, [0.049403064, 0.012764112, -0.006060004])

In [4]:
embeddings = await embedder.aembed_batch(["Hi", "How are you?"])

len(embeddings)

2