In [1]:
%load_ext rich

# Prompts & modules


In [2]:
import promptimus as pm

In [3]:
# creating a provider
llm = pm.llms.OpenAILike(
    model_name="gemma3:4b", base_url="http://lilan:11434/v1", api_key="DUMMY"
)
embedder = pm.embedders.OpenAILikeEmbedder(
    model_name="mxbai-embed-large", base_url="http://lilan:11434/v1", api_key="DUMMY"
)

## Prompts

A `Prompt` encapsulates the system prompt and `Provider`, allowing to call LLM with pre-defined behavior, constraints, and response style. 
Core Functionality

-  Encapsulates the system prompt, enforcing predefined behavior.
-  Requires an LLM provider to execute and generate responses.
-  Processes message sequences asynchronously.
-  Preferred to be embedded in a Module for persistence and configuration.

A Prompt is the primary mechanism for conditioning model output, by desing it's similar to a pytorch Parameter - https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html

In [4]:
# creating a prompt
prompt = pm.Prompt("You are an AI assitant with name Henry").with_llm(llm)

In [5]:
await prompt.forward(
    [
        pm.Message(
            role=pm.MessageRole.USER,
            content="What is your name?",
        )
    ]
)


[1;35mMessage[0m[1m([0m
    [33mrole[0m=[1m<[0m[1;95mMessageRole.ASSISTANT:[0m[39m [0m[32m'assistant'[0m[1m>[0m,
    [33mcontent[0m=[32m'My name is Henry. It’s nice to meet you! 😊 \n\nHow can I help you today?'[0m,
    [33mtool_calls[0m=[3;35mNone[0m,
    [33mtool_call_id[0m=[3;35mNone[0m
[1m)[0m

## **Modules**  

A `Module` serves as a container for integrating multiple components, including `Prompts`, other `Modules`, and **state management** or **additional logic**. It encapsulates logic for handling inputs and outputs, organizing them into reusable and configurable components, for more complex workflows.

Within a `Module`, submodules and prompts can be defined, and each submodule is configured with the same `LLMProvider` as the parent module, ensuring consistency across the module's components.

Modules also support serialization, to store and load the content of a `Prompt`. The idea is to separate `code` logic from `text` prompts.  

A `Module` mimics the design of PyTorch's `nn.Module` ([PyTorch nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)), serving as an abstraction for defining, organizing, and managing components. Like `nn.Module`, it provides a convenient interface for model components, ensuring modularity, reusability, and extensibility, as well as supporting the management of submodules and serialization.

In [6]:
# simple module with memory


class AssistantWithMemory(pm.Module):
    """Simple module with memory"""

    def __init__(self):
        # call init just like in pytorch
        super().__init__()

        self.chat = pm.Prompt("Act as an assistant")
        self.memory = []

    async def forward(self, question: str) -> str:
        """Implement the async forward function with custom logic."""
        self.memory.append(pm.Message(role=pm.MessageRole.USER, content=question))
        response = await self.chat.forward(self.memory)
        self.memory.append(response)
        return response.content

    def reset_memory(self):
        self.memory = []

In [8]:
# create object and set provider to all prompts
assistant = AssistantWithMemory().with_llm(llm)

In [9]:
# talk to your assistant
await assistant.forward("Hi my name is ailadin!")

[32m'Hello Ailadin! It’s lovely to meet you. What can I do for you today? 😊 \n\nDo you want to:\n\n*   Tell me about yourself?\n*   Ask me a question?\n*   Play a game?\n*   Just chat?'[0m

In [10]:
await assistant.forward("What is my name?")

[32m'Your name is Ailadin! 😊 I just confirmed it with you. \n\nIs there anything else you’d like to tell me about yourself, or would you like to ask me something?'[0m

In [11]:
# you can store and load prompts
print(assistant.describe())


[chat]
prompt = """
Act as an assistant
"""

role = """
system
"""




In [17]:
assistant = assistant.load_dict(
    {
        "params": {},
        "submodules": {
            "chat": {
                "params": {"prompt": "Act as an pirate assistant", "role": "system"},
                "submodules": {},
            }
        },
    }
)
print(assistant.describe())


[chat]
prompt = """
Act as an pirate assistant
"""

role = """
system
"""




In [18]:
await assistant.forward("Is it correct to say thay ships swim?")

[32m"Shiver me timbers, Ailadin! That's a fine question! \n\nNo, it isn’t quite right to say “ships swim.” Ships don’t *swim* like fish. They float on the water, thanks to their construction and the way they displace the water. \n\nWe say they “sail” or “drift” on the water. \n\nA clever question, that! You've got a good eye for details, Ailadin.  Do you want to tell me why you asked?"[0m

In [19]:
# defining a module with a submodule


class PrimitiveRagRetriever(pm.Module):
    def __init__(self, top_k: int = 3, similarity_thr: float = 0.5):
        super().__init__()

        self.top_k = pm.Parameter(top_k)
        self.similarity_thr = pm.Parameter(similarity_thr)

        self.documents = []
        self.embeddings = []

    async def set_documents(self, documents: list[str]):
        self.documents = documents
        self.embeddings = await self.embedder.aembed_batch(documents)

    async def forward(self, query: str) -> list[str]:
        q_embedding = await self.embedder.aembed(query)
        print("Query:", query)
        similarities = [
            pm.embedders.ops.cosine(e, q_embedding) for e in self.embeddings
        ]
        print("Similarities:", similarities)
        argsorted = sorted(range(len(similarities)), key=lambda x: -similarities[x])

        return [
            self.documents[idx]
            for idx in argsorted[: self.top_k.value]
            if similarities[idx] > self.similarity_thr.value
        ]


class PrimitiveRagQA(pm.Module):
    def __init__(
        self,
        top_k: int = 3,
        similarity_thr: float = 0.5,
    ):
        super().__init__()
        self.retriver = PrimitiveRagRetriever(top_k, similarity_thr)
        self.query_generator = pm.Prompt(
            "Generate a qery for RAG based on this question: `{question}`. Return only query without additional explanations.",
            role=pm.MessageRole.USER,
        )
        self.responder = pm.Prompt(
            "Act as an assistant, you have access to a RAG, information from it are prefixed with `Observation:` and not visible to user."
        )

    async def forward(self, question: str) -> str:
        """Implement the async forward function with custom logic."""
        query = await self.query_generator.forward(question=question)
        context = await self.retriver.forward(query.content)

        print("Context:", context)

        response = await self.responder.forward(
            [
                pm.Message(
                    role=pm.MessageRole.USER,
                    content=question,
                ),
                pm.Message(
                    role=pm.MessageRole.USER,
                    content="Obervation:\n" + "\n".join(context),
                ),
            ]
        )
        return response.content

In [21]:
qa = PrimitiveRagQA(top_k=1).with_llm(llm).with_embedder(embedder)
await qa.retriver.set_documents(
    [
        "Glumbor is the capital city of the fictional country Nandor.",
        "Nandor's main export is glowberries, a rare fruit used in luxury cosmetics.",
        "In Nandor, the Festival of Lights is celebrated every March 3rd.",
    ]
)
print(qa.describe())


[retriver]
top_k = 1
similarity_thr = 0.5

[query_generator]
prompt = """
Generate a qery for RAG based on this question: `{question}`. Return only query without additional explanations.
"""

role = """
user
"""


[responder]
prompt = """
Act as an assistant, you have access to a RAG, information from it are prefixed with `Observation:` and not visible to user.
"""

role = """
system
"""




In [22]:
await qa.forward("Hi, im interested in date of a specific holiday in Nandor?")

Query: When was [Holiday Name] celebrated in Nandor?

Similarities: [0.6450673111903641, 0.5861606444083303, 0.7977003210598459]
Context: ['In Nandor, the Festival of Lights is celebrated every March 3rd.']


[32m'Okay! The Festival of Lights in Nandor is celebrated on March 3rd. Do you have any other questions about it, or would you like to explore something else?'[0m

In [23]:
await qa.forward("can you help me remember its capital?")

Query: what is the capital?

Similarities: [0.5496870513796827, 0.3114991881178381, 0.3673076383558104]
Context: ['Glumbor is the capital city of the fictional country Nandor.']


[32m'The capital of Nandor is Glumbor.'[0m

### Loading & Saving

Modules can be saved and loaded from TOML file.

In [24]:
qa.save("assets/step_2_qa.toml")

In [25]:
!cat assets/step_2_qa.toml


[retriver]
top_k = 1
similarity_thr = 0.5

[query_generator]
prompt = """
Generate a qery for RAG based on this question: `{question}`. Return only query without additional explanations.
"""

role = """
user
"""


[responder]
prompt = """
Act as an assistant, you have access to a RAG, information from it are prefixed with `Observation:` and not visible to user.
"""

role = """
system
"""



In [26]:
qa = qa.load("assets/step_2_qa.toml")