In [4]:
import os
from pathlib import Path

import pandas as pd
import tiktoken

from graphrag.config.models.drift_search_config import DRIFTSearchConfig
from graphrag.query.indexer_adapters import (
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_report_embeddings,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.drift_search.drift_context import (
    DRIFTSearchContextBuilder,
)
from graphrag.query.structured_search.drift_search.search import DRIFTSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

In [7]:
INPUT_DIR = "../../../graphrag/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [8]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

print(f"Entity df columns: {entity_df.columns}")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

full_content_embedding_store = LanceDBVectorStore(
    collection_name="default-community-full_content",
)
full_content_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Entity df columns: Index(['id', 'human_readable_id', 'title', 'community', 'level', 'degree', 'x',
       'y'],
      dtype='object')
Entity count: 2886
Relationship count: 1177
Text unit records: 82


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,17a36093436dba59f919711728dfbc006ee25724db9748...,1,Audi S3 Sedán TFSI quattro - Ficha Técnica Com...,949,[0176fe4f3cb16e417b15b36f7797b9e0e16dbe4d25230...,"[45ecb081-6acc-4f25-a98a-57b99b026508, 4b1d0da...","[446b3a2a-a0cf-43d2-8ba6-6789ccb2420e, 66b5148..."
1,f7f4a3b50ed695879b783902a1fd09f49195069e75c93b...,2,**Brand**: *Renault* \n**Model**: *Kwid E-Tec...,1200,[080ce8e2d1d1092f59b9ac5ae06f19fdeccc69e8afa40...,"[ef26aaf9-ca56-4041-84e4-effc03cd5599, 17bf4fb...","[af855e08-945c-495b-9bf3-1c100529be97, d8afe12..."
2,3b30cd506f4492e0980a9a31afb415529ebf9939a23ec5...,3,**Funciones**: Modo ECO para maximizar la aut...,680,[080ce8e2d1d1092f59b9ac5ae06f19fdeccc69e8afa40...,"[6b25dd49-3caf-41eb-8a1b-c4bd0c39a9b1, c5c8af5...","[c5e80381-db28-45fc-8901-7ae3dcae3f1f, 3e02464..."
3,53fbf6c77a74e72173b94b3193ad7fd8eabd00d0a3f624...,4,Audi RS3 Sportback - Ficha Técnica Completa - ...,1076,[08c17ef4280f76430be642cfe6f21c49f6bc2585abb84...,"[8d873304-dc34-421d-8370-c1bc108f250c, c85c149...","[fe2d081a-4177-42ef-9bcb-237d64058600, 6af7892..."
4,e2f5289b98a949a0ec3c10b465a6b397625cdbe74c479f...,5,Audi Q3 Sportback 35 TFSI Ambition Plus - Fich...,1143,[12ebcd4306a0e57db19da9932457320cb891d07fc5edf...,"[4b1d0dac-3cb8-48eb-b9ad-8f1f2489e6e3, 5cd2b39...","[90a27bcc-f368-4f29-9fb7-f2fcaaed90fe, de3c0ca..."


In [9]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

In [10]:
def read_community_reports(
    input_dir: str,
    community_report_table: str = COMMUNITY_REPORT_TABLE,
):
    """Embeds the full content of the community reports and saves the DataFrame with embeddings to the output path."""
    input_path = Path(input_dir) / f"{community_report_table}.parquet"
    return pd.read_parquet(input_path)


report_df = read_community_reports(INPUT_DIR)
reports = read_indexer_reports(
    report_df,
    entity_df,
    COMMUNITY_LEVEL,
    content_embedding_col="full_content_embeddings",
)
read_indexer_report_embeddings(reports, full_content_embedding_store)

In [11]:
drift_params = DRIFTSearchConfig(
    temperature=0,
    max_tokens=12_000,
    primer_folds=1,
    drift_k_followups=3,
    n_depth=3,
    n=1,
)

context_builder = DRIFTSearchContextBuilder(
    chat_llm=llm,
    text_embedder=text_embedder,
    entities=entities,
    relationships=relationships,
    reports=reports,
    entity_text_embeddings=description_embedding_store,
    text_units=text_units,
    token_encoder=token_encoder,
    config=drift_params,
)

search = DRIFTSearch(
    llm=llm, context_builder=context_builder, token_encoder=token_encoder
)

In [12]:
resp = await search.asearch("Que carros electricos tienes?")

  return bound(*args, **kwds)
                                               

In [13]:
resp.response

'### Modelos de Carros Eléctricos Disponibles\n\nActualmente, hay una variedad de modelos de vehículos eléctricos disponibles en el mercado, cada uno con características únicas que los hacen atractivos para diferentes tipos de consumidores. A continuación, se presentan algunos de los modelos destacados:\n\n#### Audi\n- **Audi Q8 e-tron 50 quattro Prestige**: Este modelo es completamente eléctrico, con una potencia máxima de 250 kW (340 HP) y una autonomía eléctrica de hasta 488 km según el ciclo WLTP. Ofrece una experiencia de conducción de lujo con características avanzadas de confort y seguridad [Data: Sources (19)].\n- **Audi Q8 Sportback e-tron 50 quattro Prestige**: Similar al Q8 e-tron, este modelo también es 100% eléctrico, con una autonomía ligeramente superior de 501 km. Está diseñado para aquellos que buscan un estilo deportivo sin comprometer la eficiencia [Data: Sources (27)].\n\n#### Volvo\n- **Volvo XC40 Recharge Ultimate P8**: Este SUV eléctrico ofrece una potencia de 40