### This notebook showcases how to use a HyDE Rewrite IO Processer

* HyDERewriteIOProcessor only considers the last user input
* It then asks the backend to generate an answer for the user input
* Finally, it enriches the last user input by appending the generated answer

The modified user input in essence repreresents "Query + Answer". This could lead to better Retrieval

In [None]:
from granite_io.backend.vllm_server import LocalVLLMServer
from granite_io.io.granite_3_3.input_processors.granite_3_3_input_processor import (
    Granite3Point3Inputs,
)

In [None]:
from granite_io.io.hyde_rewrite import HyDERewriteIOProcessor

In [None]:
model_name = "ibm-granite/granite-3.3-8b-instruct"

In [None]:
server = LocalVLLMServer(model_name)

In [None]:
server.wait_for_startup(200)

In [None]:
backend = server.make_backend()

In [None]:
# Create an example chat completion with a short conversation.
chat_input = Granite3Point3Inputs.model_validate(
    {
        "messages": [
            {"role": "assistant", "content": "Welcome to pet questions!"},
            {
                "role": "user",
                "content": "I have two pets, a dog named Rex and a cat named Lucy.",
            },
            {
                "role": "assistant",
                "content": "Great, what would you like to share about them?",
            },
            {
                "role": "user",
                "content": "Rex spends a lot of time in the backyard and outdoors, "
                "and Luna is always inside.",
            },
            {
                "role": "assistant",
                "content": "Sounds good! Rex must love exploring outside, while Lucy "
                "probably enjoys her cozy indoor life.",
            },
            {
                "role": "user",
                "content": "But is he more likely to get fleas because of that?",
            },
        ],
        "generate_inputs": {"temperature": 0.0},
    }
)
chat_input

In [None]:
io_proc = HyDERewriteIOProcessor(backend)

In [None]:
chat_result = await io_proc.acreate_chat_completion(chat_input)
print(chat_result.results[0].next_message.model_dump_json(indent=2))

In [None]:
chat_result

In [None]:
# Free up GPU resources
if "server" in locals():
    server.shutdown()