# End-to-End Example

In [10]:

%pip install fsspec langchain-text-splitters openai python-dotenv numpy torch

Collecting torch
  Downloading torch-2.4.1-cp311-none-macosx_11_0_arm64.whl.metadata (26 kB)
Collecting filelock (from torch)
  Downloading filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy (from torch)
  Downloading sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.4.1-cp311-none-macosx_11_0_arm64.whl (62.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading filelock-3.16.1-py3-none-any.whl (16 kB)
Using cached networkx-3.3-py3-none-any.whl (1.7 MB)
Downloading sympy-1.13.3-py3-none-any.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hUsin

In [11]:
%pip install -U git+https://github.com/neo4j/neo4j-graphrag-python

Collecting git+https://github.com/neo4j/neo4j-graphrag-python
  Cloning https://github.com/neo4j/neo4j-graphrag-python to /private/var/folders/dq/tkr53kq51wd04bk26m5s3_5c0000gn/T/pip-req-build-6h32lpd3
  Running command git clone --filter=blob:none --quiet https://github.com/neo4j/neo4j-graphrag-python /private/var/folders/dq/tkr53kq51wd04bk26m5s3_5c0000gn/T/pip-req-build-6h32lpd3
  Resolved https://github.com/neo4j/neo4j-graphrag-python to commit ff1c6ee677996f804d8f80c669aae52a4e317925
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [12]:
from dotenv import load_dotenv
import os

#load neo4j credentials

load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')

## KG Building

In [13]:
from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
loader = PdfLoader()
pdf_doc = await loader.run("pgpm-13-39.pdf")

In [14]:
from langchain_text_splitters import CharacterTextSplitter
from neo4j_graphrag.experimental.components.text_splitters.langchain import LangChainTextSplitterAdapter
splitter = LangChainTextSplitterAdapter(
    CharacterTextSplitter(chunk_size=500, chunk_overlap=100, separator=".")
)
split_text = await splitter.run(text=pdf_doc.text)

Created a chunk of size 563, which is longer than the specified 500
Created a chunk of size 646, which is longer than the specified 500
Created a chunk of size 732, which is longer than the specified 500


In [15]:
split_text.chunks[0].text

'REVIEW\nT owards Precision Medicine in Systemic Lupus\nErythematosus\nThis article was published in the following Dove Press journal:\nPharmacogenomics and Personalized Medicine\nElliott Lever1\nMarta R Alves2\nDavid A Isenberg1\n1Centre for Rheumatology, Division of\nMedicine, University College Hospital\nLondon, London, UK;2Internal Medicine,\nDepartment of Medicine, Centro\nHospitalar do Porto, Porto, PortugalAbstract: Systemic lupus erythematosus (SLE) is a remarkable condition characterised by\ndiversity amongst its clinical features and immunological abnormalities'

In [24]:
from neo4j_graphrag.experimental.components.types import TextChunks, TextChunk
from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings

text_chunk_embedder = TextChunkEmbedder(embedder=OpenAIEmbeddings())
split_text_w_emb = await text_chunk_embedder.run(split_text)

In [27]:
from neo4j_graphrag.experimental.components.schema import (
    SchemaBuilder,
    SchemaEntity,
    SchemaProperty,
    SchemaRelation,
)

schema_builder = SchemaBuilder()

node_properties = [SchemaProperty(name="name", type="STRING"), SchemaProperty(name="details", type="STRING")]

basic_node_labels = ["Object",
                     "Entity",
                     "Group",
                     "Person",
                     "Organization",
                     "Place"
                     ]
academic_node_labels = ["ArticleOrPaper", "PublicationOrJournal"]

medical_node_labels = ["Anatomy",
                       "BiologicalProcess",
                       "Cell",
                       "CellularComponent",
                       "CellType",
                       "Condition",
                       "Disease",
                       "Drug",
                       "EffectOrPhenotype",
                       "Exposure",
                       "GeneOrProtein",
                       "Molecule",
                       "MolecularFunction",
                       "Pathway"
                       ]

node_labels = basic_node_labels + academic_node_labels + medical_node_labels


rel_properties = [SchemaProperty(name="relType", type="STRING", description="The type of relationship")]
basic_rel_types = ["RELATES_TO"]

# cartesian product for possible schema
possible_schema = []
for src in node_labels:
    for dst in node_labels:
        for re in basic_rel_types:
            possible_schema.append((src, re, dst))


In [28]:
schema = await schema_builder.run(
    entities=[SchemaEntity(label=i, properties=node_properties) for i in node_labels],
    relations=[] ,#[SchemaRelation(label=i, properties=rel_properties) for i in basic_rel_types],
    potential_schema=[] #possible_schema
)
schema

SchemaConfig(entities={'Object': {'label': 'Object', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'details', 'type': 'STRING', 'description': ''}]}, 'Entity': {'label': 'Entity', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'details', 'type': 'STRING', 'description': ''}]}, 'Group': {'label': 'Group', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'details', 'type': 'STRING', 'description': ''}]}, 'Person': {'label': 'Person', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'details', 'type': 'STRING', 'description': ''}]}, 'Organization': {'label': 'Organization', 'description': '', 'properties': [{'name': 'name', 'type': 'STRING', 'description': ''}, {'name': 'details', 'type': 'STRING', 'description': ''}]}, 'Place': {'label': 'Place', 'description': '', 'properties': [{'name': '

In [29]:
from neo4j_graphrag.experimental.components.entity_relation_extractor import (
    LLMEntityRelationExtractor, OnError,
)
from neo4j_graphrag.llm import OpenAILLM

extractor = LLMEntityRelationExtractor(
    llm=OpenAILLM(
        model_name="gpt-4o",
        model_params={
            "max_tokens": 1000,
            "response_format": {"type": "json_object"},
        }
    ),
    on_error=OnError.IGNORE,
)


In [30]:
res_graph = await extractor.run(chunks=split_text_w_emb, schema=schema)

In [31]:
print(len(res_graph.nodes))
print(len(res_graph.relationships))

1393
1582


In [32]:
import neo4j
from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
from neo4j_graphrag.experimental.components.types import Neo4jGraph

with neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) as driver:
    writer = Neo4jWriter(driver)
    await writer.run(res_graph)



## Retrieval

In [None]:
# vector retrieval
