In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
INPUT_DIR = "../../ragtest/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [3]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 3462


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,entity_type,top_level_node_id,x,y
0,0,RENAULT MASTER,VEHICLE MODEL,Modelo de vehículo Renault Master para el año ...,12ddd42462c1d1454f0a376c793f0d9d,16,46,0,f2be3c7562ec4aabb5a634ee57cb9fc0,46,,,f2be3c7562ec4aabb5a634ee57cb9fc0,0,0
1,0,$189.990.000,PRICE,Precio inicial para la versión Renault Master ...,12ddd42462c1d1454f0a376c793f0d9d,16,1,1,e2096bc79a7948c4bb1f85b127049a3d,1,,,e2096bc79a7948c4bb1f85b127049a3d,0,0
2,0,$208.990.000,PRICE,Precio inicial para la versión Renault Master ...,12ddd42462c1d1454f0a376c793f0d9d,16,1,2,0cf964f50bc34ce09ae518ec26f787e5,1,,,0cf964f50bc34ce09ae518ec26f787e5,0,0
3,0,FURGÓN COMERCIAL LIGERO,BODY TYPE,"Tipo de carrocería de la Renault Master, diseñ...",12ddd42462c1d1454f0a376c793f0d9d,16,1,3,04dfc2c2ae01409d81c7acb5e69aadfb,1,,,04dfc2c2ae01409d81c7acb5e69aadfb,0,0
4,0,2.3L TURBO DIÉSEL DCI,ENGINE TYPE,"Motor de 2.3 litros, turbo diésel, con 4 cilin...",12ddd42462c1d1454f0a376c793f0d9d,16,1,4,8e46349240f24425b6bc1df2031323a0,1,,,8e46349240f24425b6bc1df2031323a0,0,0


In [4]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 1259


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,RENAULT MASTER,$189.990.000,8.0,El precio inicial de la Renault Master Maxi Ca...,[12ddd42462c1d1454f0a376c793f0d9d],8dbf0d8467804ee08014e63ff9e2e0b8,0,46,1,47
1,RENAULT MASTER,$208.990.000,8.0,El precio inicial de la Renault Master Plus Ca...,[12ddd42462c1d1454f0a376c793f0d9d],05fdb7b933c8476690ed3d00f6d72cae,1,46,1,47
2,RENAULT MASTER,FURGÓN COMERCIAL LIGERO,9.0,La Renault Master es un furgón comercial ligero,[12ddd42462c1d1454f0a376c793f0d9d],237f599bf8c14872b3b1cd781fb54b79,2,46,1,47
3,RENAULT MASTER,2.3L TURBO DIÉSEL DCI,9.0,La Renault Master está equipada con un motor 2...,[12ddd42462c1d1454f0a376c793f0d9d],e61c1b3db59641aca72114c587cccc72,3,46,1,47
4,RENAULT MASTER,MANUAL DE 6 VELOCIDADES,8.0,La Renault Master utiliza una transmisión manu...,[12ddd42462c1d1454f0a376c793f0d9d],2268c1c589d940d697a22ef9c891f1e2,4,46,4,50


In [5]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 107


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,100,# Renault Trafic 2024: Innovaciones y Opciones...,2,8.5,Renault Trafic 2024: Innovaciones y Opciones,La calificación es alta debido a la relevancia...,La comunidad se centra en el modelo Renault Tr...,[{'explanation': 'El Cargador Inalámbrico para...,"{\n ""title"": ""Renault Trafic 2024: Innovaci...",5f7f705b-11d4-45ee-9bb6-352b8338688e
1,101,# Renault Trafic 2024: Innovaciones en Ilumina...,2,8.5,Renault Trafic 2024: Innovaciones en Iluminación,La calificación es alta debido a la relevancia...,La comunidad se centra en las especificaciones...,[{'explanation': 'Los faros delanteros Full LE...,"{\n ""title"": ""Renault Trafic 2024: Innovaci...",89895199-7d23-41a2-9d37-8eea1f9471de
2,102,# Renault Trafic 2024 y Volkswagen\n\nLa comun...,2,8.5,Renault Trafic 2024 y Volkswagen,La calificación es alta debido a la relevancia...,La comunidad se centra en el análisis de las e...,[{'explanation': 'El precio inicial de la Rena...,"{\n ""title"": ""Renault Trafic 2024 y Volkswa...",ef8e971e-0b52-411c-b789-67c9ec878fe7
3,103,# Accesorios del Volkswagen Polo Track\n\nLa c...,2,8.5,Accesorios del Volkswagen Polo Track,La calificación es alta debido a la relevancia...,La comunidad se centra en los accesorios diseñ...,[{'explanation': 'Las exploradoras delanteras ...,"{\n ""title"": ""Accesorios del Volkswagen Pol...",5469099c-98be-4fd4-8eda-a88ad8dfbea6
4,104,# Accesorios Interiores para Volkswagen: Tapet...,2,8.5,Accesorios Interiores para Volkswagen: Tapetes...,La calificación es alta debido a la relevancia...,La comunidad se centra en los accesorios inter...,[{'explanation': 'Los tapetes de goma termofor...,"{\n ""title"": ""Accesorios Interiores para Vo...",de50feb9-0045-4aa9-ae98-2d4dae4b04cf


In [6]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 51


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,12ddd42462c1d1454f0a376c793f0d9d,Renault Master - Ficha Técnica Completa por Ve...,1200,[0e22c9d5a405886b189ac7189e8becc3],"[f2be3c7562ec4aabb5a634ee57cb9fc0, e2096bc79a7...","[8dbf0d8467804ee08014e63ff9e2e0b8, 05fdb7b933c..."
1,30c928ab3ef711168df0c97bbc65c986,de climatización**:\n- Aire acondicionado man...,826,[0e22c9d5a405886b189ac7189e8becc3],"[69fdcab168bb47e8bbb58cc7d5a4dd2b, a9e6a28c30a...","[1e613f33337a484cbb0871c3e4f55835, cd29e1ec5c4..."
2,ffe348a2a2d4e8ef0459f59a9f9cdbf0,Renault Stepway - Ficha Técnica Completa por V...,1200,[18eba02963db7eb36139693217e9292e],"[69fdcab168bb47e8bbb58cc7d5a4dd2b, 8896cf2a425...","[6eda75eced2941dda8ebb9024b2cd5ce, 91e4cc9628a..."
3,5febd6498cc6511572e3da693ada2627,queo traseros\n - Cámara de reversa\n - Se...,1111,[18eba02963db7eb36139693217e9292e],"[ce0cc800cbd849e4b317eef73fc6db68, a88e5786e70...","[68c28d3afa0448099cf45c57d7d6041d, 99d3823c036..."
4,7b62723bf425ea8c35c168755e4a37dc,"asistencias, y repuestos originales\n",11,[18eba02963db7eb36139693217e9292e],,


In [7]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

In [8]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    #covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [9]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [10]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [14]:
result = await search_engine.asearch("Dame información detallada sobre la volkswagen teramont")
print(result.response)

### Introducción al Volkswagen Teramont 2023

El Volkswagen Teramont 2023 es un SUV que se destaca por su diseño moderno y sus características avanzadas, enfocándose en la comodidad y seguridad de sus ocupantes. Este modelo ofrece una combinación de innovaciones tecnológicas, capacidades de motor robustas y opciones de personalización que lo posicionan como una opción competitiva en el mercado de SUVs [Data: Reports (20); Entities (737)].

### Características de Seguridad

El Teramont está equipado con múltiples características de seguridad que incluyen airbags frontales y laterales tipo cortina, sistema antibloqueo de ruedas (ABS), y control electrónico de estabilidad (ESC). Estas características son esenciales para garantizar la seguridad de los ocupantes en caso de un accidente. Además, cuenta con sensores de parqueo delanteros y traseros, lo que facilita el estacionamiento en entornos urbanos [Data: Reports (20); Entities (747, 516); Relationships (996, 510, 412)].

### Innovacione