In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

In [3]:
BASE_URL = "http://localhost:8080/"
MODEL_NAME = "gpt-4o"

llm = ChatOpenAI(
    api_key="api_key",
    model=MODEL_NAME,
    api_base=BASE_URL,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

In [4]:
# parquet files generated from indexing pipeline
INPUT_DIR = "/home/cip/ce/ix05ogym/Majid/LLM/GraphRag/elec_graph/output"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"

# community level in the Leiden community hierarchy from which we will load the community reports
# higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
COMMUNITY_LEVEL = 2

In [5]:
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)
report_df.head()

Total report count: 73
Report count after filtering by community level 2: 57


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,58,# Soldering and Electronics Assembly\n\nThis c...,2,7.0,Soldering and Electronics Assembly,The impact severity rating is high due to the ...,This community revolves around the process of ...,[{'explanation': 'Soldering is the central ent...,"{\n ""title"": ""Soldering and Electronics Ass...",b35de978-ca01-44d7-a012-7b6526ae3412
1,59,# Flux and Solderability\n\nThis community rev...,2,3.0,Flux and Solderability,The impact severity rating is low due to the l...,This community revolves around the concept of ...,[{'explanation': 'Flux is a key entity in this...,"{\n ""title"": ""Flux and Solderability"",\n ...",287df284-3c47-4e78-8024-7bfed14af4ee
2,60,# Assembly and its Associated Faults\n\nThis c...,2,7.0,Assembly and its Associated Faults,The impact severity rating is high due to the ...,This community revolves around the concept of ...,[{'explanation': 'Assembly is a fundamental pr...,"{\n ""title"": ""Assembly and its Associated F...",797c3220-46e5-4c6f-9a3a-83da48391eb3
3,61,# Component Assembly and Manufacturing\n\nThis...,2,7.0,Component Assembly and Manufacturing,The impact severity rating is high due to the ...,This community focuses on the assembly and man...,[{'explanation': 'Components are the fundament...,"{\n ""title"": ""Component Assembly and Manufa...",65dff29b-096d-4a55-aaef-66de20e2de64
4,62,# Electrical Connections and Circuits\n\nThis ...,2,3.0,Electrical Connections and Circuits,The impact severity rating is low due to the t...,This community revolves around the concept of ...,[{'explanation': 'Soldering is a crucial techn...,"{\n ""title"": ""Electrical Connections and Ci...",d294c6d5-ba91-42de-a01e-1aa6c4b21978


In [6]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

In [7]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [8]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [12]:
result = await search_engine.asearch(
    "describe soldering metrics"
)

print(result.response)

Soldering is a crucial process in electronics manufacturing, directly impacting the reliability and performance of electronic circuits and assemblies. [Data: Reports (10, 71, 58, 38)] A high-quality soldered joint ensures a strong and reliable connection, meeting its intended purpose. [Data: Reports (10, 71, 58, 38)] This is achieved by ensuring the soldered joints are properly formed, free from defects, and meet the required specifications. [Data: Reports (10, 71, 58, 38)]

**Solderability** is a key metric in soldering, referring to the ability of a metal surface to be wetted by solder. [Data: Reports (71)] Several factors can affect solderability, including the cleanliness of the surface, the type of metal, the presence of oxides and contaminants, and the soldering process itself. [Data: Reports (71)] Oxidation, in particular, can hinder solderability by forming a layer of oxide on the surface of metals, creating a barrier between the solder and the metal surface. [Data: Reports (71

In [10]:
# inspect the data used to build the context for the LLM responses
result.context_data["reports"]

Unnamed: 0,id,title,occurrence weight,content,rank
0,28,Printed Circuit Board Manufacturing,0.894737,# Printed Circuit Board Manufacturing\n\nThe c...,7.0
1,10,Soldering Quality and Cost in Electronics Manu...,0.526316,# Soldering Quality and Cost in Electronics Ma...,7.0
2,68,SC Soldering and Surface Mounting Assembly,0.368421,# SC Soldering and Surface Mounting Assembly\n...,3.0
3,48,"Copper, Tin, and Board in Electronics Assembly",0.263158,"# Copper, Tin, and Board in Electronics Assemb...",7.0
4,49,Cleaning and Soldering in Electronics Assembly,0.263158,# Cleaning and Soldering in Electronics Assemb...,7.0
5,53,SI-GC-CU-13 Laminate and its Properties,0.263158,# SI-GC-CU-13 Laminate and its Properties\n\nT...,3.0
6,20,Hand Soldering and Soldering Iron,0.263158,# Hand Soldering and Soldering Iron\n\nThis co...,2.0
7,15,Electronics Assemblies and Soldering Processes,0.210526,# Electronics Assemblies and Soldering Process...,7.0
8,65,Soldering Process and Electronics Manufacturing,0.210526,# Soldering Process and Electronics Manufactur...,6.0
9,63,"Dip, Drag, and Wave Soldering Processes",0.210526,"# Dip, Drag, and Wave Soldering Processes\n\nT...",3.0


In [11]:
# inspect number of LLM calls and tokens
print(f"LLM calls: {result.llm_calls}. LLM tokens: {result.prompt_tokens}")

LLM calls: 3. LLM tokens: 29212
