In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
INPUT_DIR = "../../ragtest/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [3]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 6336


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,entity_type,top_level_node_id,x,y
0,0,RENAULT KARDIAN,VEHICLE MODEL,Modelo de vehículo Renault Kardian para el año...,6841aba1206e1df18c0bbe61f96ba6ca,15,16,0,e8c37e4f17cf4124a9dc05f3ee112274,16,,,e8c37e4f17cf4124a9dc05f3ee112274,0,0
1,0,$79.990.000,PRICE,El monto de $79.990.000 se refiere al precio i...,"6841aba1206e1df18c0bbe61f96ba6ca,8399732e82380...",15,3,1,cca5c8d573244621a4e020a265d0ce5b,3,,,cca5c8d573244621a4e020a265d0ce5b,0,0
2,0,$88.990.000,PRICE,El precio inicial de $88.990.000 corresponde a...,"6841aba1206e1df18c0bbe61f96ba6ca,de91d654670b5...",15,2,2,c113a4518fed4c74a1a4eb93ddbb9dde,2,,,c113a4518fed4c74a1a4eb93ddbb9dde,0,0
3,0,$94.990.000,PRICE,El precio inicial para el modelo MG 3 Hybrid +...,"6841aba1206e1df18c0bbe61f96ba6ca,c307fe3e5aa1e...",15,2,3,43152fe78b604ee89236cf713a7a583b,2,,,43152fe78b604ee89236cf713a7a583b,0,0
4,0,$99.990.000,PRICE,El monto de $99.990.000 se refiere al precio i...,"6841aba1206e1df18c0bbe61f96ba6ca,983f218801e2f...",15,2,4,06180e015b15483a81108a40c9cdab74,2,,,06180e015b15483a81108a40c9cdab74,0,0


In [4]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 1777


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,RENAULT KARDIAN,$79.990.000,8.0,El precio inicial de la Renault Kardian Evolut...,[6841aba1206e1df18c0bbe61f96ba6ca],e84b819c9420403e9538a7d3ae7bfca6,0,16,3,19
1,RENAULT KARDIAN,$88.990.000,8.0,El precio inicial de la Renault Kardian Evolut...,[6841aba1206e1df18c0bbe61f96ba6ca],bc15f93785764a1f8cffc5e42a92102b,1,16,2,18
2,RENAULT KARDIAN,$94.990.000,8.0,El precio inicial de la Renault Kardian Techno...,[6841aba1206e1df18c0bbe61f96ba6ca],20effb9ca7d24727a271814984629d23,2,16,2,18
3,RENAULT KARDIAN,$99.990.000,8.0,El precio inicial de la Renault Kardian Premie...,[6841aba1206e1df18c0bbe61f96ba6ca],350a3b759afc46a190ea46fef9c0b612,3,16,2,18
4,RENAULT KARDIAN,SCE 1.6L 16V,9.0,La Renault Kardian Evolution MT está equipada ...,[6841aba1206e1df18c0bbe61f96ba6ca],e2d3f28d34824e81818d2d9599830467,4,16,2,18


In [5]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 156


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,154,# Renault Logan 2024: Especificaciones y Confi...,3,9.0,Renault Logan 2024: Especificaciones y Configu...,La relevancia es alta debido a la diversidad d...,La comunidad del Renault Logan 2024 se centra ...,[{'explanation': 'El Renault Logan 2024 se ofr...,"{\n ""title"": ""Renault Logan 2024: Especific...",a28afe20-bd81-4841-8a98-9cb805f28396
1,155,# Renault Trafic 2024 y Faros Full LED\n\nLa c...,3,8.5,Renault Trafic 2024 y Faros Full LED,La calificación es alta debido a la relevancia...,La comunidad se centra en el Renault Trafic 20...,[{'explanation': 'Los faros Full LED son una c...,"{\n ""title"": ""Renault Trafic 2024 y Faros F...",9e8b43c4-3170-4ff9-a6c0-b608e8fc1260
2,103,# Accesorios del Volkswagen Polo Track y su Im...,2,8.5,Accesorios del Volkswagen Polo Track y su Impacto,La calificación es alta debido a la relevancia...,La comunidad se centra en los accesorios inter...,[{'explanation': 'Los tapetes de goma termofor...,"{\n ""title"": ""Accesorios del Volkswagen Pol...",ef2f287a-ca36-4002-bf4a-33e73ca89dff
3,104,# Renault Logan y Sistemas de Entretenimiento\...,2,8.5,Renault Logan y Sistemas de Entretenimiento,La calificación es alta debido a la relevancia...,La comunidad se centra en el Renault Logan y s...,[{'explanation': 'El reproductor de DVD de 9 p...,"{\n ""title"": ""Renault Logan y Sistemas de E...",42d3d615-8273-4876-9770-0b289139d210
4,105,# Renault Trafic 2024 y Servicios de Financiac...,2,8.5,Renault Trafic 2024 y Servicios de Financiación,La calificación es alta debido a la relevancia...,La comunidad se centra en el Renault Trafic 20...,[{'explanation': 'Renault Crédito es un servic...,"{\n ""title"": ""Renault Trafic 2024 y Servici...",8815c16f-8e1f-41f7-92c7-23b13b227729


In [6]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 82


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,6841aba1206e1df18c0bbe61f96ba6ca,**Brand**: *Renault* \n**Model**: *Kardian* ...,1200,[059564634fef34aae9ffc86a5313ebee],"[e8c37e4f17cf4124a9dc05f3ee112274, cca5c8d5732...","[e84b819c9420403e9538a7d3ae7bfca6, bc15f937857..."
1,a257fc6f2b0bbf4b96397f43513c8ad1,ecnología**: Stop & Start\n\n---\n**3. Renault...,1039,[059564634fef34aae9ffc86a5313ebee],"[4a6e6967453b4045bac684f2f38002a5, ea553af17dc...","[f1f9fa0823bd4216ba1e5df174781871, 4e9a868bdf9..."
2,39449d4d2d13ed0c2cf6a3b65cf615ae,Audi Q5 Sportback 45 TFSI quattro MHEV Prestig...,1161,[0788158cfd2a5c9ed034ed23197376db],"[4a6e6967453b4045bac684f2f38002a5, 9854b91c3de...","[0e9b5f6736284a8b867a8f4123e330ba, 4f1cbf39e6e..."
3,04f9ebb008ac1ca0ad1998df0765e861,"aluminio ""Rhombus""\n- Apoyabrazos delantero d...",61,[0788158cfd2a5c9ed034ed23197376db],"[3d7342614f75403798eb20c865262ff3, d54a09715fe...","[6c79baf573714764be7c8c22c2c33ef8, 1de7988e0f5..."
4,d8c45c3cd3e468a982745a8975823c09,Audi RS3 Sedán - Ficha Técnica Completa - Mode...,1097,[0e61a0ff2c1f30b0e0d5748c06c90cae],"[6cff592317e64e998562543cad0901d2, 773a64d52ce...","[63a0c47df9e64e79876d08dc5b8c6479, a902faac9aa..."


In [7]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

In [8]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    #covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [9]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [10]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [20]:
result = await search_engine.asearch("recomiendame automoviles de 300 millones ")
print(result.response)

Para un presupuesto de aproximadamente 300 millones, hay varias opciones de automóviles que destacan por sus características avanzadas y su rendimiento. A continuación, te presento algunas recomendaciones basadas en los datos disponibles:

### Audi RS3 Sportback

El Audi RS3 Sportback es una opción destacada en este rango de precios, con un precio inicial de $299.900.000 [Data: Entities (717); Relationships (1176)]. Este modelo es conocido por su potente motor de cinco cilindros en línea, que ofrece una potencia máxima de 400 HP y un torque de 500 Nm. La aceleración de 0 a 100 km/h se logra en solo 3.8 segundos, y su velocidad máxima es de 250 km/h [Data: Sources (31)]. Además, el RS3 Sportback cuenta con un sistema de tracción quattro® y una transmisión S tronic de 7 velocidades, lo que garantiza un manejo dinámico y seguro.

### Audi RS3 Sedán

Otra opción dentro de este rango de precios es el Audi RS3 Sedán, con un precio inicial de $319.900.000 [Data: Entities (131); Relationships 