# RAG examples

In [None]:
from os import getenv
from dotenv import load_dotenv

import torch
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer

from travel_agent.rag.search import RagSearch

load_dotenv()

qdrant_client = QdrantClient(url=getenv("QDRANT_URL"))

## Dummy retrieval by full query

Use

In [None]:
EMBEDDING_MODEL = "intfloat/multilingual-e5-large-instruct"
LLM_MODEL = "Qwen/Qwen1.5-4B-Chat"

In [None]:
from travel_agent.qdrant.mosru_places import MosruPlaces

# Upload to qdrant
MosruPlaces(EMBEDDING_MODEL)

In [None]:
embedding_model = SentenceTransformer(EMBEDDING_MODEL)

tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
llm_model = AutoModelForCausalLM.from_pretrained(
    LLM_MODEL, torch_dtype=torch.float16, device_map="auto"
)

rag = RagSearch(qdrant_client, embedding_model)

In [None]:
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else ("mps" if torch.backends.mps.is_available() else "cpu")
)


def format_prompt(query, results) -> str:
    places_text = "\n".join(
        [
            f"- {r['Name']} ({r['Type']}) по адресу: {r['Address']}, {r['District']}"
            for r in results
        ]
    )

    return (
        f'Запрос: "{query}"\n'
        f"Топ результатов по запросу:\n{places_text}\n"
        "Суммаризируй информацию в дружелюбной и информативной форме."
    )

In [None]:
query = "Где находится Мастерская Рыбникова?"

retrieved_data = rag.search_places(query)

prompt = format_prompt(query, retrieved_data)
print(f"*Prompt:*\n\n{prompt}")

In [None]:
from transformers import pipeline

qwen_pipeline = pipeline("text-generation", model=LLM_MODEL, device=DEVICE)
response = qwen_pipeline(prompt, max_length=512, return_full_text=False)

print(response[0]["generated_text"])