# Visualizing the knowledge graph with `yfiles-jupyter-graphs`

This notebook is a partial copy of [local_search.ipynb](../../local_search.ipynb) that shows how to use `yfiles-jupyter-graphs` to add interactive graph visualizations of the parquet files  and how to visualize the result context of `graphrag` queries (see at the end of this notebook).

In [1]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [25]:
import os

import pandas as pd
import numpy as np
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding, OpenAICompatibleOllamaEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

## Local Search Example

Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

### Load text units and graph data tables as context for local search

- In this test we first load indexing outputs from parquet files to dataframes, then convert these dataframes into collections of data objects aligning with the knowledge model.

### Load tables to dataframes

In [3]:
INPUT_DIR = "/Users/luwi/Documents/Code/microsoft_graphrag_local/ragdirs/753aa9b3-12c7-44a1-910d-b5a989455725/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

#### Read entities

In [4]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

In [21]:
entity_embedding_df.head()

Unnamed: 0,id,name,type,description,human_readable_id,graph_embedding,text_unit_ids,description_embedding
0,c2703426e45341189ac49d4a938f17e8,DRAKALON,GEO,Here is a comprehensive summary of the data pr...,0,,"[23afb11229f218353d4be66e9a255f5c, 825747b9333...","[0.1213243305683136, 1.4661195278167725, -3.40..."
1,0bcb760874124140be8d00350b033ac9,GOVERNOR ISKA TORVATH,PERSON,"Governor of Drakalon, which has rising tension...",1,,[23afb11229f218353d4be66e9a255f5c],"[-0.08680564165115356, 1.3107608556747437, -3...."
2,6f4f0797e7804646a61c643418140c26,ZYNARA PRIME,,Here is a comprehensive summary of the data pr...,2,,"[23afb11229f218353d4be66e9a255f5c, baaa785ec1c...","[-0.018054157495498657, 1.880157709121704, -3...."
3,5b0218e7f1b84ae3b833dd550afc511c,ISKA TORVATH,PERSON,Iska Torvath is the governor of Drakalon,3,,[23afb11229f218353d4be66e9a255f5c],"[-0.2125309705734253, 0.9322000741958618, -3.3..."
4,88bcf98a60074c84a0977a793510dda0,TORVATH ISKA,PERSON,Iska Torvath is the governor of Drakalon,4,,[23afb11229f218353d4be66e9a255f5c],"[-0.2500615119934082, 0.8710544109344482, -3.3..."


In [28]:
np.asarray([v.tolist() for v in entity_embedding_df.description_embedding.values]).shape

(144, 768)

In [33]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import numpy as np
from ast import literal_eval

# Load the embeddings



# Convert to a list of lists of floats
matrix = np.asarray([v.tolist() for v in entity_embedding_df.description_embedding.values])

# Create a t-SNE model and transform the data
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
tsne.fit(matrix)
vis_dims = tsne.embedding_(matrix)
vis_dims.shape
colors = ["red", "darkorange", "gold", "turquoise", "darkgreen"]
x = [x for x,y in vis_dims]
y = [y for x,y in vis_dims]

color_indices = entity_embedding_df.name.apply(lambda name: 1 if name not in ["DRAKALON", "GOVERNOR ISKA TORVATH"] else 2)

colormap = matplotlib.colors.ListedColormap(colors)
plt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3)
# for score in [0,1,2,3,4]:
#     avg_x = np.array(x)[df.Score-1==score].mean()
#     avg_y = np.array(y)[df.Score-1==score].mean()
#     color = colors[score]
#     plt.scatter(avg_x, avg_y, marker='x', color=color, s=100)

plt.title("Amazon ratings visualized in language using t-SNE")

TypeError: 'numpy.ndarray' object is not callable

In [5]:
entity_df.head()

Unnamed: 0,level,title,type,description,source_id,entity_type,degree,human_readable_id,id,size,graph_embedding,community,top_level_node_id,x,y
0,0,DRAKALON,GEO,Here is a comprehensive summary of the data pr...,"23afb11229f218353d4be66e9a255f5c,825747b9333a4...",GEO,5,0,c2703426e45341189ac49d4a938f17e8,5,,,c2703426e45341189ac49d4a938f17e8,0,0
1,0,GOVERNOR ISKA TORVATH,PERSON,"Governor of Drakalon, which has rising tension...",23afb11229f218353d4be66e9a255f5c,PERSON,2,1,0bcb760874124140be8d00350b033ac9,2,,,0bcb760874124140be8d00350b033ac9,0,0
2,0,ZYNARA PRIME,,Here is a comprehensive summary of the data pr...,"23afb11229f218353d4be66e9a255f5c,baaa785ec1c67...",,5,2,6f4f0797e7804646a61c643418140c26,5,,,6f4f0797e7804646a61c643418140c26,0,0
3,0,ISKA TORVATH,PERSON,Iska Torvath is the governor of Drakalon,23afb11229f218353d4be66e9a255f5c,,0,3,5b0218e7f1b84ae3b833dd550afc511c,0,,,5b0218e7f1b84ae3b833dd550afc511c,0,0
4,0,TORVATH ISKA,PERSON,Iska Torvath is the governor of Drakalon,23afb11229f218353d4be66e9a255f5c,,0,4,88bcf98a60074c84a0977a793510dda0,0,,,88bcf98a60074c84a0977a793510dda0,0,0


#### Read relationships

In [6]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

# Visualizing nodes and relationships with `yfiles-jupyter-graphs`

`yfiles-jupyter-graphs` is a graph visualization extension that provides interactive and customizable visualizations for structured node and relationship data.

In this case, we use it to provide an interactive visualization for the knowledge graph of the [local_search.ipynb](../../local_search.ipynb) sample by passing node and relationship lists converted from the given parquet files. The requirements for the input data is an `id` attribute for the nodes and `start`/`end` properties for the relationships that correspond to the node ids. Additional attributes can be added in the `properties` of each node/relationship dict:

In [7]:
# %pip install yfiles_jupyter_graphs --quiet
from yfiles_jupyter_graphs import GraphWidget


# converts the entities dataframe to a list of dicts for yfiles-jupyter-graphs
def convert_entities_to_dicts(df):
    """Convert the entities dataframe to a list of dicts for yfiles-jupyter-graphs."""
    nodes_dict = {}
    for _, row in df.iterrows():
        # Create a dictionary for each row and collect unique nodes
        node_id = row["title"]
        if node_id not in nodes_dict:
            nodes_dict[node_id] = {
                "id": node_id,
                "properties": row.to_dict(),
            }
    return list(nodes_dict.values())


# converts the relationships dataframe to a list of dicts for yfiles-jupyter-graphs
def convert_relationships_to_dicts(df):
    """Convert the relationships dataframe to a list of dicts for yfiles-jupyter-graphs."""
    relationships = []
    for _, row in df.iterrows():
        # Create a dictionary for each row
        relationships.append({
            "start": row["source"],
            "end": row["target"],
            "properties": row.to_dict(),
        })
    return relationships


w = GraphWidget()
w.directed = True
w.nodes = convert_entities_to_dicts(entity_df)
w.edges = convert_relationships_to_dicts(relationship_df)

## Configure data-driven visualization

The additional properties can be used to configure the visualization for different use cases.

In [8]:
# show title on the node
w.node_label_mapping = "title"


# map community to a color
def community_to_color(community):
    """Map a community to a color."""
    colors = [
        "crimson",
        "darkorange",
        "indigo",
        "cornflowerblue",
        "cyan",
        "teal",
        "green",
    ]
    return (
        colors[int(community) % len(colors)] if community is not None else "lightgray"
    )


def edge_to_source_community(edge):
    """Get the community of the source node of an edge."""
    source_node = next(
        (entry for entry in w.nodes if entry["properties"]["title"] == edge["start"]),
        None,
    )
    source_node_community = source_node["properties"]["community"]
    return source_node_community if source_node_community is not None else None


w.node_color_mapping = lambda node: community_to_color(node["properties"]["community"])
w.edge_color_mapping = lambda edge: community_to_color(edge_to_source_community(edge))
# map size data to a reasonable factor
w.node_scale_factor_mapping = lambda node: 0.5 + node["properties"]["size"] * 1.5 / 20
# use weight for edge thickness
w.edge_thickness_factor_mapping = "weight"

## Automatic layouts

The widget provides different automatic layouts that serve different purposes: `Circular`, `Hierarchic`, `Organic (interactiv or static)`, `Orthogonal`, `Radial`, `Tree`, `Geo-spatial`.

For the knowledge graph, this sample uses the `Circular` layout, though `Hierarchic` or `Organic` are also suitable choices.

In [9]:
# Use the circular layout for this visualization. For larger graphs, the default organic layout is often preferrable.
w.circular_layout()

## Display the graph

In [10]:
display(w)

GraphWidget(layout=Layout(height='800px', width='100%'))

# Visualizing the result context of `graphrag` queries

The result context of `graphrag` queries allow to inspect the context graph of the request. This data can similarly be visualized as graph with `yfiles-jupyter-graphs`.

## Making the request

The following cell recreates the sample queries from [local_search.ipynb](../../local_search.ipynb).

In [34]:
text_embedder = OpenAICompatibleOllamaEmbedding(
    api_key="olama",
    api_base="http://localhost:11434",
    api_type=OpenaiApiType.OpenAI,
    model="nomic-embed-text",
    deployment_name="nomic-embed-text",
    max_retries=20,
)
text_embedder.embed("Hello")


[0.01793399280623597,
 -0.005854579889000267,
 -0.17534418004648694,
 -0.013763085130679056,
 0.03403166964708823,
 0.044716866713814724,
 0.012388898216240448,
 -0.002581090107914815,
 -0.014828093654396909,
 -0.03925331587937342,
 -0.009222989617368595,
 0.051709257068857484,
 0.057709552445519396,
 0.05717743942831176,
 0.04500711063891563,
 -0.05357980979471495,
 0.028786292556083788,
 -0.04717806316399884,
 -0.03904654086178995,
 0.0269334136890929,
 0.009372563018252985,
 -0.06675166908990061,
 0.004938109901870367,
 -0.005874107879731298,
 0.1722405408556482,
 -0.004403435088242628,
 0.018686358172985692,
 0.08337000695703303,
 0.001605537739684346,
 -0.022504985867161712,
 0.02034850622363601,
 -0.018724731854213094,
 0.01533299836447742,
 0.0031955236630070247,
 0.027548082498262304,
 -0.018739514180486694,
 -0.003846668585139685,
 0.011296355453870728,
 0.016178449521048348,
 0.019279552279787997,
 -0.017621894982169144,
 -0.009598179921684678,
 0.009244901854272788,
 -0.0283

In [11]:
# setup (see also ../../local_search.ipynb)
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)
# covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
# claims = read_indexer_covariates(covariate_df)
# covariates = {"claims": claims}
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

# api_key = os.environ["GRAPHRAG_API_KEY"]
# llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
# embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key="ollama",
    model="llama3.1",
    api_base="http://localhost:11434/v1",
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAICompatibleOllamaEmbedding(
    api_key="olama",
    api_base="http://localhost:11434",
    api_type=OpenaiApiType.OpenAI,
    model="nomic-embed-text",
    deployment_name="nomic-embed-text",
    max_retries=20,
)

context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    covariates=None,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 1,
    "top_k_relationships": 1,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [12]:
text_embedder.embed("Hello")

[0.01793399280623597,
 -0.005854579889000267,
 -0.17534418004648694,
 -0.013763085130679056,
 0.03403166964708823,
 0.044716866713814724,
 0.012388898216240448,
 -0.002581090107914815,
 -0.014828093654396909,
 -0.03925331587937342,
 -0.009222989617368595,
 0.051709257068857484,
 0.057709552445519396,
 0.05717743942831176,
 0.04500711063891563,
 -0.05357980979471495,
 0.028786292556083788,
 -0.04717806316399884,
 -0.03904654086178995,
 0.0269334136890929,
 0.009372563018252985,
 -0.06675166908990061,
 0.004938109901870367,
 -0.005874107879731298,
 0.1722405408556482,
 -0.004403435088242628,
 0.018686358172985692,
 0.08337000695703303,
 0.001605537739684346,
 -0.022504985867161712,
 0.02034850622363601,
 -0.018724731854213094,
 0.01533299836447742,
 0.0031955236630070247,
 0.027548082498262304,
 -0.018739514180486694,
 -0.003846668585139685,
 0.011296355453870728,
 0.016178449521048348,
 0.019279552279787997,
 -0.017621894982169144,
 -0.009598179921684678,
 0.009244901854272788,
 -0.0283

## Run local search on sample queries

In [13]:
result =search_engine.search("Who is Zynara Prime?")
print(result.response)

**Zynara Prime: A Key Player in Galactic Politics**

According to the provided data, Zynara Prime is a planet with which Drakalon has rising tensions [Data: Relationships (0, 1); Sources (0)]. This suggests that there are significant political or economic disagreements between the two planets.

Unfortunately, no further information about Zynara Prime's government, economy, or culture is available in the provided data. However, it is clear that Zynara Prime plays a crucial role in the galactic politics of this universe.

**Relationships with Drakalon and Governor Iska Torvath**

The relationships table reveals that there are rising tensions between Zynara Prime and both Drakalon [Data: Relationships (0, 1)] and its governor, Governor Iska Torvath [Data: Relationships (5)]. This suggests that the issues between Zynara Prime and Drakalon may be complex and involve multiple stakeholders.

**Trade Accords and Diplomatic Efforts**

A source mentions that Drakalon refuses to accept the Unifie

In [14]:
print(result)

SearchResult(response="**Zynara Prime: A Key Player in Galactic Politics**\n\nAccording to the provided data, Zynara Prime is a planet with which Drakalon has rising tensions [Data: Relationships (0, 1); Sources (0)]. This suggests that there are significant political or economic disagreements between the two planets.\n\nUnfortunately, no further information about Zynara Prime's government, economy, or culture is available in the provided data. However, it is clear that Zynara Prime plays a crucial role in the galactic politics of this universe.\n\n**Relationships with Drakalon and Governor Iska Torvath**\n\nThe relationships table reveals that there are rising tensions between Zynara Prime and both Drakalon [Data: Relationships (0, 1)] and its governor, Governor Iska Torvath [Data: Relationships (5)]. This suggests that the issues between Zynara Prime and Drakalon may be complex and involve multiple stakeholders.\n\n**Trade Accords and Diplomatic Efforts**\n\nA source mentions that Dr

In [20]:
question = "What is the Crimson Circle?"
result = await search_engine.asearch(question)
print(result.response)

search_prompt 
---Role---

You are a helpful assistant responding to questions about data in the tables provided.


---Goal---

Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.

If you don't know the answer, just say so. Do not make anything up.

Points supported by data should list their data references as follows:

"This is an example sentence supported by multiple data references [Data: <dataset name> (record ids); <dataset name> (record ids)]."

Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more.

For example:

"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Sources (15, 16), Reports (1), Entities (5, 7); Relationships (23); Claims (2, 7, 34, 46, 64, 

## Inspecting the context data used to generate the response

In [16]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,1,GOVERNOR ISKA TORVATH,"Governor of Drakalon, which has rising tension...",2,True
1,0,DRAKALON,Here is a comprehensive summary of the data pr...,5,True


In [17]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,rank,links,in_context
0,1,DRAKALON,GOVERNOR ISKA TORVATH,Governor Iska Torvath is the ruler of Drakalon...,4.0,7,1,True
1,0,DRAKALON,ZYNARA PRIME,Drakalon has rising tensions with Zynara Prime...,8.0,10,2,True
2,5,GOVERNOR ISKA TORVATH,ZYNARA PRIME,There are rising tensions between Zynara Prime...,4.0,7,2,True


## Visualizing the result context as graph

In [18]:
"""
Helper function to visualize the result context with `yfiles-jupyter-graphs`.

The dataframes are converted into supported nodes and relationships lists and then passed to yfiles-jupyter-graphs.
Additionally, some values are mapped to visualization properties.
"""


def show_graph(result):
    """Visualize the result context with yfiles-jupyter-graphs."""
    from yfiles_jupyter_graphs import GraphWidget

    if (
        "entities" not in result.context_data
        or "relationships" not in result.context_data
    ):
        msg = "The passed results do not contain 'entities' or 'relationships'"
        raise ValueError(msg)

    # converts the entities dataframe to a list of dicts for yfiles-jupyter-graphs
    def convert_entities_to_dicts(df):
        """Convert the entities dataframe to a list of dicts for yfiles-jupyter-graphs."""
        nodes_dict = {}
        for _, row in df.iterrows():
            # Create a dictionary for each row and collect unique nodes
            node_id = row["entity"]
            if node_id not in nodes_dict:
                nodes_dict[node_id] = {
                    "id": node_id,
                    "properties": row.to_dict(),
                }
        return list(nodes_dict.values())

    # converts the relationships dataframe to a list of dicts for yfiles-jupyter-graphs
    def convert_relationships_to_dicts(df):
        """Convert the relationships dataframe to a list of dicts for yfiles-jupyter-graphs."""
        relationships = []
        for _, row in df.iterrows():
            # Create a dictionary for each row
            relationships.append({
                "start": row["source"],
                "end": row["target"],
                "properties": row.to_dict(),
            })
        return relationships

    w = GraphWidget()
    # use the converted data to visualize the graph
    w.nodes = convert_entities_to_dicts(result.context_data["entities"])
    w.edges = convert_relationships_to_dicts(result.context_data["relationships"])
    w.directed = True
    # show title on the node
    w.node_label_mapping = "entity"
    # use weight for edge thickness
    w.edge_thickness_factor_mapping = "weight"
    display(w)


show_graph(result)

GraphWidget(layout=Layout(height='500px', width='100%'))