In [44]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

In [45]:
INPUT_DIR = "../../graphrag/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [46]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 7428


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,entity_type,top_level_node_id,x,y
0,0,FRENADO AUTÓNOMO DE EMERGENCIA (AEB),SAFETY_FEATURES,"El ""FRENADO AUTÓNOMO DE EMERGENCIA (AEB)"" es u...","2ba32e8ec63c9fe2cc32b86a604bcb58,9839e8e25dc77...",1,2,0,dcf718d0d10e4c3cb42d529d37e13adf,2,,,dcf718d0d10e4c3cb42d529d37e13adf,0,0
1,0,CÁMARA DE VISIÓN TRASERA,SAFETY_FEATURES,Cámara que proporciona visión trasera para asi...,a257fc6f2b0bbf4b96397f43513c8ad1,2,1,1,a2b090fc690341168844f23f00db1296,1,,,a2b090fc690341168844f23f00db1296,0,0
2,0,SENSORES DE PARQUEO DELANTEROS Y TRASEROS,SAFETY_FEATURES,"Los ""SENSORES DE PARQUEO DELANTEROS Y TRASEROS...","01d93cf85102e8d901cb0d9d8d5baca1,164dab1ac8364...",2,4,2,6572cfa1972d434fa1dcb4fe1d9c8520,4,,,6572cfa1972d434fa1dcb4fe1d9c8520,0,0
3,0,FRENO DE PARQUEO ELÉCTRICO,COMFORT_FEATURES,Freno de parqueo con activación eléctrica,a257fc6f2b0bbf4b96397f43513c8ad1,2,1,3,75a9594ab97d42138f5640f46d92665f,1,,,75a9594ab97d42138f5640f46d92665f,0,0
4,0,TARJETA MANOS LIBRES PARA ACCESO Y ARRANQUE,COMFORT_FEATURES,Sistema de acceso y arranque sin llave mediant...,a257fc6f2b0bbf4b96397f43513c8ad1,2,1,4,1671b31c48fb4879af2be95882a6e1f4,1,,,1671b31c48fb4879af2be95882a6e1f4,0,0


In [47]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 918


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,FRENADO AUTÓNOMO DE EMERGENCIA (AEB),RENAULT KARDIAN TECHNO,8.0,El modelo Renault Kardian Techno incluye el si...,[a257fc6f2b0bbf4b96397f43513c8ad1],2222a19e8ae44b0da3b8a8755ec8ba9a,0,2,38,40
1,FRENADO AUTÓNOMO DE EMERGENCIA (AEB),RENAULT KOLEOS,9.0,El frenado autónomo de emergencia es una carac...,[f8b6bbaa70d915076d89429fe8a333c2],ddaca8431c4c49e5aed6e93f18ba2323,1,2,51,53
2,CÁMARA DE VISIÓN TRASERA,RENAULT KARDIAN TECHNO,7.0,El modelo Renault Kardian Techno incluye una c...,[a257fc6f2b0bbf4b96397f43513c8ad1],2f739edbcf5c455babc75ba8f7a871f1,2,1,38,39
3,SENSORES DE PARQUEO DELANTEROS Y TRASEROS,RENAULT KARDIAN TECHNO,7.0,El modelo Renault Kardian Techno incluye senso...,[a257fc6f2b0bbf4b96397f43513c8ad1],d04aa8e4aaf44b74bd2d6329ec62e91d,3,4,38,42
4,SENSORES DE PARQUEO DELANTEROS Y TRASEROS,AUDI RS3 SEDÁN,7.0,El Audi RS3 Sedán está equipado con sensores d...,[d8c45c3cd3e468a982745a8975823c09],576b2a5e46de46b8a05b3901f09377fa,4,4,46,50


In [48]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 107


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,105,# Renault Koleos and Koleos Intens CVT 4X4\n\n...,3,6.5,Renault Koleos and Koleos Intens CVT 4X4,The rating reflects a moderate level of signif...,The community is centered around the Renault K...,[{'explanation': 'The Renault Koleos serves as...,"{\n ""title"": ""Renault Koleos and Koleos Int...",3bd96e87-4aa8-4f21-aa00-fcc32c96c5d2
1,106,# Renault Koleos and its Advanced Features\n\n...,3,8.5,Renault Koleos and its Advanced Features,The rating reflects the comprehensive integrat...,The community is centered around the Renault K...,[{'explanation': 'The Renault Koleos is equipp...,"{\n ""title"": ""Renault Koleos and its Advanc...",c659cc51-de97-43fd-bc99-19874be8fa36
2,100,# MG ZS EV y sus Características Avanzadas\n\n...,2,8.5,MG ZS EV y sus Características Avanzadas,El análisis es altamente relevante para la ind...,"La comunidad se centra en el MG ZS EV, un vehí...",[{'explanation': 'El MG ZS EV incorpora múltip...,"{\n ""title"": ""MG ZS EV y sus Característica...",4748d80f-f4e8-4203-abd0-a368604649c2
3,101,# Sistemas de Seguridad: Monitoreo de Punto Ci...,2,7.5,Sistemas de Seguridad: Monitoreo de Punto Cieg...,La relevancia es alta debido a la importancia ...,La comunidad se centra en los sistemas de segu...,[{'explanation': 'El Monitoreo de Punto Ciego ...,"{\n ""title"": ""Sistemas de Seguridad: Monito...",184aa8d0-dbf0-4b4d-9524-3a09d0f7e7f6
4,102,# Características de Confort en Vehículos: Asi...,2,6.5,Características de Confort en Vehículos: Asien...,El análisis es relevante para entender las ten...,La comunidad se centra en las características ...,[{'explanation': 'Los asientos traseros abatib...,"{\n ""title"": ""Características de Confort en...",adc17aa3-01a6-4a98-8889-2e4936fe9444


In [49]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 82


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,6841aba1206e1df18c0bbe61f96ba6ca,**Brand**: *Renault* \n**Model**: *Kardian* ...,1200,[059564634fef34aae9ffc86a5313ebee],,
1,a257fc6f2b0bbf4b96397f43513c8ad1,ecnología**: Stop & Start\n\n---\n**3. Renault...,1039,[059564634fef34aae9ffc86a5313ebee],"[dcf718d0d10e4c3cb42d529d37e13adf, a2b090fc690...","[2222a19e8ae44b0da3b8a8755ec8ba9a, 2f739edbcf5..."
2,39449d4d2d13ed0c2cf6a3b65cf615ae,Audi Q5 Sportback 45 TFSI quattro MHEV Prestig...,1161,[0788158cfd2a5c9ed034ed23197376db],"[6572cfa1972d434fa1dcb4fe1d9c8520, 30e338de088...","[e5297b985c324899a3e3a6e663593f51, f56329c3ca2..."
3,04f9ebb008ac1ca0ad1998df0765e861,"aluminio ""Rhombus""\n- Apoyabrazos delantero d...",61,[0788158cfd2a5c9ed034ed23197376db],"[d5f8178b88d84310906e488309a12dd0, 88ae3687088...",
4,d8c45c3cd3e468a982745a8975823c09,Audi RS3 Sedán - Ficha Técnica Completa - Mode...,1097,[0e61a0ff2c1f30b0e0d5748c06c90cae],"[6572cfa1972d434fa1dcb4fe1d9c8520, 1dd60200888...","[576b2a5e46de46b8a05b3901f09377fa, 97d347f6e89..."


In [50]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

In [51]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    #covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [52]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 20,
    "top_k_relationships": 20,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [53]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [54]:
result = await search_engine.asearch("Recomiendame 5 carros hibridos que no excedan los 200 millones")
print(result.response)

En el mercado actual, los vehículos híbridos ofrecen una combinación de eficiencia de combustible y rendimiento, lo que los hace atractivos para los consumidores conscientes del medio ambiente. A continuación, te presento cinco opciones de vehículos híbridos que podrían ajustarse a un presupuesto de hasta 200 millones de pesos. Sin embargo, es importante tener en cuenta que los precios pueden variar según la región y las especificaciones del modelo.

### 1. **Renault Zoe E-Tech Iconic 2024**
El Renault Zoe E-Tech Iconic es un hatchback eléctrico que ofrece una autonomía de hasta 395 km en ciclo WLTP y hasta 400 km en ciudad. Está equipado con un motor eléctrico de 135 HP y una batería de iones de litio de 52 kWh. Este modelo es conocido por su eficiencia energética y su capacidad de carga rápida, lo que lo convierte en una opción atractiva para quienes buscan un vehículo eléctrico asequible [Data: Entities (1383); Sources (58)].

### 2. **MG 3 Hybrid +**
El MG 3 Hybrid + es un modelo 2

In [55]:
import os

import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

In [56]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

In [57]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [58]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [None]:
result = await search_engine.asearch(
    "Recomiendame 5 carros hibridos que no superen los 200 millones"
)

print(result.response)