In [None]:
import asyncio
import os

from ragu import (
    SimpleChunker,
    KnowledgeGraph,
    BuilderArguments,
    Settings,
    ArtifactsExtractorLLM,
)
from ragu.chunker.chunkers import SemanticTextChunker
from ragu.llm import OpenAIClient
from ragu.embedder import OpenAIEmbedder

from ragu.utils.ragu_utils import read_text_from_files

# Configuration (or use ragu.Env for loading from .env)
LLM_MODEL_NAME = "qwen/qwen3-14b"
LLM_BASE_URL = os.environ['VSEGPT_BASE_URL']
LLM_API_KEY = os.environ['VSEGPT_KEY']

EMBEDDER_MODEL_NAME = "emb-qwen/qwen3-embedding-8b"  # https://vsegpt.ru/Docs/Models/Embeddings

# Set up LLM client
client = OpenAIClient(
    model_name=LLM_MODEL_NAME,
    base_url=LLM_BASE_URL,
    api_token=LLM_API_KEY,
    max_requests_per_second=1,
    max_requests_per_minute=60,
    cache_flush_every=1,
)

In [None]:
# Configure working directory and language
Settings.storage_folder = "bl_index"
Settings.language = "english"  # or "russian"

# Load documents from folder
docs = read_text_from_files("/home/oleg/rag_workspace/natural_rag/datasets/bl_small/docs")

In [None]:
chunker = SemanticTextChunker('all-mpnet-base-v2', max_chunk_size=512)

In [None]:
artifact_extractor = ArtifactsExtractorLLM(
    client=client,
    do_validation=False
)

embedder = OpenAIEmbedder(
    model_name=EMBEDDER_MODEL_NAME,
    base_url=LLM_BASE_URL,
    api_token=LLM_API_KEY,
    dim=4096,
    max_requests_per_second=1,
    max_requests_per_minute=60,
    use_cache=True,
)

builder_settings = BuilderArguments(
    use_llm_summarization=True,
    vectorize_chunks=True,
)

knowledge_graph = KnowledgeGraph(
    client=client,
    embedder=embedder,
    chunker=chunker,
    artifact_extractor=artifact_extractor,
    builder_settings=builder_settings,
)

In [None]:
knowledge_graph = await knowledge_graph.build_from_docs(docs)

In [None]:
from typing import Any, MutableMapping
from diskcache import Index

shelf: MutableMapping[str, Any] = Index('database/')

In [None]:
knowledge_graph = await knowledge_graph.build_from_docs(docs)

In [None]:
from ragu import LocalSearchEngine

local_search = LocalSearchEngine(
    client,
    knowledge_graph,
    embedder,
    tokenizer_model="gpt-4o-mini",
)
query = '''\
<конец секции документов>
Как попас
'''
print(query)
context = await local_search.a_search(query, top_k=20)
answer = await local_search.a_query(quert, top_k=20)
print(answer)