In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
INPUT_DIR = "../../../graphrag_reduced/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"
COMMUNITY_LEVEL = 2

In [17]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")

entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

entities

[Entity(id='02f945dc-6c34-4cae-ad49-1429f6b32f58', short_id='54', title='TIPTRONIC', type='EVENT', description='Tiptronic is a type of automatic transmission used in the Audi Q7, allowing for manual gear selection.', description_embedding=None, name_embedding=None, community_ids=['20', '5', '15'], text_unit_ids=['02ec64d44ac9d2832dc81d0724cafee20a68be595587bc7ae2e2508cc8e434ae339cfaa5b2f7c59abf1ca3a04def0a60d0e2a36bd889d82cede4b5a1f13be8e5'], rank=1, attributes=None),
 Entity(id='045a885f-e334-48f6-be68-ad6bcc98d35a', short_id='24', title='SISTEMA ANTIRROBO', type='EVENT', description='El "Sistema Antirrobo" se refiere a un sistema de inmovilizador electrónico presente en varios modelos de Audi, diseñado para prevenir el acceso no autorizado y proteger contra el robo de vehículos. Este sistema se encuentra en el Audi Q8, donde impide el acceso no autorizado, así como en el Audi RS Q3, que también cuenta con un sistema antirrobo similar. Además, el Audi S3 Sedán TFSI quattro está equipa

In [15]:
# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 278


Unnamed: 0,id,human_readable_id,title,type,description,text_unit_ids,frequency,degree,x,y
0,47aecdb7-c51a-4e15-b587-9b5490357ef5,0,AUDI S3 SEDÁN TFSI QUATTRO,ORGANIZATION,The Audi S3 Sedán TFSI quattro is a high-perfo...,[17a36093436dba59f919711728dfbc006ee25724db974...,1,31,0.0,0.0
1,1f256b1d-e759-4a07-93ca-eb7027a47fc5,1,AUDI,ORGANIZATION,Audi es un fabricante de automóviles alemán re...,[17a36093436dba59f919711728dfbc006ee25724db974...,16,34,0.0,0.0
2,3b81d717-576f-4e17-b80a-9d473391348e,2,MOTOR Y TRANSMISIÓN,EVENT,MOTOR Y TRANSMISIÓN se refiere a las especific...,[17a36093436dba59f919711728dfbc006ee25724db974...,3,3,0.0,0.0
3,6b1012e6-143f-455a-8a4a-2fc0cddf0177,3,DIMENSIONES Y CAPACIDADES,EVENT,"Las ""DIMENSIONES Y CAPACIDADES"" abarcan una va...",[17a36093436dba59f919711728dfbc006ee25724db974...,12,12,0.0,0.0
4,aaaf1aed-9b43-4dea-95d2-69520cd00f7a,4,SEGURIDAD,EVENT,"La entidad ""SEGURIDAD"" abarca una amplia gama ...",[17a36093436dba59f919711728dfbc006ee25724db974...,12,12,0.0,0.0


In [6]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 294


Unnamed: 0,id,human_readable_id,source,target,description,weight,combined_degree,text_unit_ids
0,784bac1b-50b7-4184-80ad-d664f870a63f,0,AUDI S3 SEDÁN TFSI QUATTRO,AUDI,The Audi S3 Sedán TFSI quattro is a model prod...,8.0,65,[17a36093436dba59f919711728dfbc006ee25724db974...
1,a5724801-0fb1-4c06-8f32-b896bfa93fbc,1,AUDI S3 SEDÁN TFSI QUATTRO,MOTOR Y TRANSMISIÓN,The specifications of the engine and transmiss...,7.0,34,[17a36093436dba59f919711728dfbc006ee25724db974...
2,2daae577-7012-4698-a2dd-ee0ae1ff4c82,2,AUDI S3 SEDÁN TFSI QUATTRO,DIMENSIONES Y CAPACIDADES,The dimensions and capacities are essential at...,7.0,43,[17a36093436dba59f919711728dfbc006ee25724db974...
3,c340a498-ee82-43cc-9cbf-b1819675b799,3,AUDI S3 SEDÁN TFSI QUATTRO,SEGURIDAD,Safety features are a significant aspect of th...,7.0,43,[17a36093436dba59f919711728dfbc006ee25724db974...
4,07e5b244-4511-47fd-a8bb-335d80aadc77,4,AUDI S3 SEDÁN TFSI QUATTRO,CONFORT Y EQUIPAMIENTO,Comfort and equipment details enhance the appe...,7.0,43,[17a36093436dba59f919711728dfbc006ee25724db974...


In [22]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 23


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,8e3422017300400e9e96f0ce7908c1d2,19,19,2,15,[],Audi Vehicle Features and Specifications,The community focuses on various Audi vehicle ...,# Audi Vehicle Features and Specifications\n\n...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'The Audi Q7 55 TFSI quattro ...,"{\n ""title"": ""Audi Vehicle Features and Spe...",2025-05-01,7
1,a31e7f901c104c238a0598adfc487d38,20,20,2,15,[],Audi Q7 55 TFSI Quattro MHEV Prestige Community,The community centers around the Audi Q7 55 TF...,# Audi Q7 55 TFSI Quattro MHEV Prestige Commun...,6.5,The impact severity rating is moderate to high...,[{'explanation': 'The Audi Q7 55 TFSI Quattro ...,"{\n ""title"": ""Audi Q7 55 TFSI Quattro MHEV ...",2025-05-01,3
2,82148fd43c5e45f8af8151a86ba7aa72,21,21,2,17,[],Audi Q8 e-tron Community Overview,The community centers around the Audi Q8 e-tro...,# Audi Q8 e-tron Community Overview\n\nThe com...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'The Audi Q8 e-tron 50 quattr...,"{\n ""title"": ""Audi Q8 e-tron Community Over...",2025-05-01,6
3,2c62cbe634114fcf886fab0d44b10670,22,22,2,17,[],Audi Q8 Sportback e-tron Community,The community centers around the Audi Q8 Sport...,# Audi Q8 Sportback e-tron Community\n\nThe co...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'The Audi Q8 Sportback e-tron...,"{\n ""title"": ""Audi Q8 Sportback e-tron Comm...",2025-05-01,6
4,ea97f62c8ce64289984fab316c7e80e3,8,8,1,1,[],Volvo XC60 Plus T8 and Associated Technologies,The community centers around the Volvo XC60 Pl...,# Volvo XC60 Plus T8 and Associated Technologi...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'The Volvo XC60 Plus T8 is a ...,"{\n ""title"": ""Volvo XC60 Plus T8 and Associ...",2025-05-01,10


In [8]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 24


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids,covariate_ids
0,17a36093436dba59f919711728dfbc006ee25724db9748...,1,Audi S3 Sedán TFSI quattro - Ficha Técnica Com...,949,[0176fe4f3cb16e417b15b36f7797b9e0e16dbe4d25230...,"[47aecdb7-c51a-4e15-b587-9b5490357ef5, 1f256b1...","[784bac1b-50b7-4184-80ad-d664f870a63f, a572480...",[]
1,53fbf6c77a74e72173b94b3193ad7fd8eabd00d0a3f624...,2,Audi RS3 Sportback - Ficha Técnica Completa - ...,1076,[08c17ef4280f76430be642cfe6f21c49f6bc2585abb84...,"[1f256b1d-e759-4a07-93ca-eb7027a47fc5, 6b1012e...","[69d22c28-0fa8-48e5-9160-2b86be02cd24, 2ed616e...",[]
2,e2f5289b98a949a0ec3c10b465a6b397625cdbe74c479f...,3,Audi Q3 Sportback 35 TFSI Ambition Plus - Fich...,1143,[12ebcd4306a0e57db19da9932457320cb891d07fc5edf...,"[1f256b1d-e759-4a07-93ca-eb7027a47fc5, 6b1012e...","[08cbfd0e-3753-4838-9e21-12d653153246, 2e37230...",[]
3,02ec64d44ac9d2832dc81d0724cafee20a68be595587bc...,4,Audi Q7 55 TFSI quattro MHEV Prestige - Ficha ...,1200,[1f38568f11c47995c3f238adbadeb3d1752dfde264174...,"[1f256b1d-e759-4a07-93ca-eb7027a47fc5, 6b1012e...","[043cf794-d2fd-4b7c-836a-b95d9e6b410e, 8dbe7fe...",[]
4,3b07297565e6ea94a094430d4b05fa2c010cfd508a758c...,5,""" en diseño de 10 brazos en estrella\n- Llanta...",175,[1f38568f11c47995c3f238adbadeb3d1752dfde264174...,"[07718a7b-d6c6-415e-a5a4-8158b3db2aa8, 48a6027...",,[]


In [9]:
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager

api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

chat_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.OpenAIChat,
    model=llm_model,
    max_retries=20,
)
chat_model = ModelManager().get_or_create_chat_model(
    name="local_search",
    model_type=ModelType.OpenAIChat,
    config=chat_config,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

embedding_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.OpenAIEmbedding,
    model=embedding_model,
    max_retries=20,
)

text_embedder = ModelManager().get_or_create_embedding_model(
    name="local_search_embedding",
    model_type=ModelType.OpenAIEmbedding,
    config=embedding_config,
)

In [10]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    covariates=None,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [11]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

model_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [12]:
search_engine = LocalSearch(
    model=chat_model,
    context_builder=context_builder,
    token_encoder=token_encoder,
    model_params=model_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [14]:
result = await search_engine.search("Tell me about all the audi models you have avilable")
print(result.response)

### Overview of Audi Models

Audi is a renowned German automobile manufacturer known for its luxury vehicles and advanced automotive technology. The brand offers a diverse range of models, including electric vehicles, SUVs, and high-performance cars, catering to various market segments [Data: Entities (1)].

### Luxury SUVs

Audi's lineup of luxury SUVs includes models like the **Audi Q7 55 TFSI quattro MHEV Prestige** and the **Audi Q5 45 TFSI quattro Advanced**. The Q7 is a flagship model featuring advanced hybrid technology, high performance, and premium comfort attributes, making it a standout choice for luxury SUV enthusiasts [Data: Entities (52); Reports (5)]. The Q5, on the other hand, is known for its advanced technology and performance specifications, offering a blend of luxury and practicality [Data: Entities (160)].

### High-Performance Models

For those seeking high-performance vehicles, Audi offers models such as the **Audi RS3 Sedán** and **Audi S3 Sedán TFSI quattro**. 