# Knowledge Graph Builder

https://neo4j.com/docs/neo4j-graphrag-python/current/user_guide_kg_builder.html



In [3]:
import os

import neo4j

from dotenv import load_dotenv

from neo4j_graphrag.llm import OpenAILLM, MistralAILLM, OllamaLLM
from neo4j_graphrag.embeddings import OpenAIEmbeddings, MistralAIEmbeddings, OllamaEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter

In [4]:
load_dotenv()

True

In [5]:
driver = neo4j.GraphDatabase.driver(
    os.environ.get("NEO4J_URL"),
    auth=(os.environ.get("NEO4J_USER"), os.environ.get("NEO4J_PASSWORD")),
)

In [14]:
# Using Ollama:
llm = OllamaLLM(
    model_name="llama3.2:3b",
    model_params={
        "max_tokens": 2000,
        "response_format": {"type": "json_object"},
        "temperature": 0,
    },
)
embedder = OllamaEmbeddings(model="nomic-embed-text")

In [None]:
# Using MistralAI
# MISTRAL_API_KEY in env vars
llm = MistralAILLM(model_name="mistral-large-latest")
embedder = MistralAIEmbeddings(model="mistral-embed")

In [6]:
# Using OpenAI
# OPENAI_API_KEY in env vars
llm = OpenAILLM(model_name="gpt-4o")
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")

In [15]:
pipeline = SimpleKGPipeline(
    driver=driver,
    llm=llm,
    embedder=embedder,
    from_pdf=True,
)

In [7]:
ENTITIES = [
    "Person",
    "Country",
    {
        "label": "Company",
        "description": "A private company",
        "properties": [
            {
                "name": "name",
                "type": "STRING",
            },
            {
                "name": "industry",
                "type": "STRING",
            }
        ]
    },
]
RELATIONS = [
    "WORKS_FOR",
    {
        "label": "LOCATED_IN",
        "properties": [
            {
                "name": "city",
                "description": "Extract the city if available",
                "type": "STRING",
            }
        ]
    },
]
POTENTIAL_SCHEMA = [
    ("Person", "WORKS_FOR", "Company"),
    ("Person", "LOCATED_IN", "Country"),
    ("Company", "LOCATED_IN", "Country"),
]

In [8]:
pipeline = SimpleKGPipeline(
    driver=driver,
    llm=llm,
    embedder=embedder,
    from_pdf=True,
    entities=ENTITIES,
    relations=RELATIONS,
    potential_schema=POTENTIAL_SCHEMA,
    text_splitter=FixedSizeSplitter(  # using smaller chunk size for demo purposes only
        chunk_size=400,
        chunk_overlap=10,
    )
)

In [9]:
await pipeline.run_async(file_path="document.pdf")

PipelineResult(run_id='4a82a60c-1d88-4fbe-a788-5a0d06d6caab', result={'resolver': {'number_of_nodes_to_resolve': 8, 'number_of_created_nodes': 6}})

In [27]:
from neo4j_graphrag.experimental.pipeline.config.runner import PipelineRunner

In [29]:
pipeline = PipelineRunner.from_config_file("config_kg_pipeline.json")
await pipeline.run({"file_path":"document.pdf"})

PipelineResult(run_id='dbbca3ec-2c59-43aa-99ed-962c9050e594', result={'resolver': {'number_of_nodes_to_resolve': 11, 'number_of_created_nodes': 6}})