# Knowledge Graph Builder Example

This notebook demonstrates the process of building a knowledge graph from DataFrames using the functions defined in `kg_builder.py`. It includes examples of how to extract entities and generate Cypher commands.

In [1]:
import sys
import os
import pandas as pd
from kg_builder import describe_tables, extract_entities_and_relations, build_cypher_commands, GoogleAIStudioLLM, execute_cypher, build_cypher_commands_with_embeddings, execute_cypher_with_params, create_vector_indexes
# Add the parser directory to sys.path
import pathlib
notebook_dir = pathlib.Path().resolve()
parser_dir = notebook_dir / '../parser'
sys.path.append(str(parser_dir.resolve()))

from parser import parse_xml

# Sample DataFrame
# Parse the XML file
xml_file_path = '../parser/data/RAN_CM_DATA_SAMPLES.xml'  # Adjust path as needed
dfs, metadata, metadata2 = parse_xml(xml_file_path)

  from .autonotebook import tqdm as notebook_tqdm
2025-07-09 12:09:02.628967: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752062946.656521     786 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752062947.807495     786 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752062956.707498     786 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752062956.707533     786 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752062956.707537     786

In [2]:
# Step 1: Describe tables
import yaml
import os

CONFIG_PATH = os.path.join(notebook_dir, "../config.yaml")
with open(CONFIG_PATH, "r") as f:
    config = yaml.safe_load(f)
GOOGLE_API_KEY = config["google_ai_studio"]["api_key"]
GOOGLE_MODEL = config["google_ai_studio"].get("model", "gemini-1.5-flash-latest")

llm = GoogleAIStudioLLM(api_key=GOOGLE_API_KEY, model=GOOGLE_MODEL)
descriptions = describe_tables(dfs, llm)

In [3]:
# Step 2: Extract entities & relationships
entities, relationships = extract_entities_and_relations(descriptions, llm)

In [4]:
# Step 5: Robust DataFrame-to-Neo4j loading (entities, relationships, embeddings)
from kg_builder import load_dataframes_to_neo4j

# This will load all DataFrames as nodes, create relationships, and add embeddings/descriptions
df_dict = dict(dfs)  # Ensure dfs is a dict of DataFrames
descriptions = describe_tables(df_dict, llm)  # (Re)describe if needed
entities, relationships = extract_entities_and_relations(descriptions, llm)
load_dataframes_to_neo4j(df_dict, entities, relationships, llm)

