In [None]:
# https://neo4j.com/docs/neo4j-graphrag-python/current/user_guide_rag.html#using-another-llm-model
# https://neo4j.com/blog/news/graphrag-python-package/
# https://neo4j.com/blog/developer/enhancing-hybrid-retrieval-graphrag-python-package/

In [35]:
from neo4j import GraphDatabase
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.llm import OllamaLLM
from neo4j_graphrag.embeddings.ollama import OllamaEmbeddings
from neo4j_graphrag.retrievers import VectorRetriever, HybridCypherRetriever
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

In [33]:
# Initialize the ChatOllama model with the specified model name
# model_name = 'qwen3-vl:4b'
chat_name = 'llama3.2:3b'  # Or another text-focused model
cypher_name = 'tomasonjo/llama3-text2cypher-demo:8b_4bit'

# and initialize the ChatOllama instance
chat_model = OllamaLLM(
    model_name=chat_name,
    model_params={
        "response_format": {"type": "json_object"},
        "temperature": 0.7
    }
)

In [15]:
embedder_name = 'qwen3-embedding:0.6b'

embedder = OllamaEmbeddings(
    model=embedder_name
)

In [16]:
import os
from dotenv import load_dotenv
from langchain_neo4j import Neo4jGraph

# Load environment variables from .env file
load_dotenv()

# Get credentials from environment variables
neo4j_url = os.getenv("NEO4J_URL", "bolt://localhost:7687")
neo4j_user = os.getenv("NEO4J_USER", "neo4j")
neo4j_password = os.getenv("NEO4J_PASSWORD")

if not neo4j_password:
    raise ValueError("NEO4J_PASSWORD environment variable is not set. Please create a .env file with your credentials.")

graph = Neo4jGraph(
    url=neo4j_url,
    username=neo4j_user,
    password=neo4j_password
)

driver = GraphDatabase.driver(
    neo4j_url,
    auth=(neo4j_user, neo4j_password)
)


In [17]:
text = '''
The solar system consists of the Sun and the objects that orbit it, including planets, moons, asteroids, comets, and meteoroids.
The Sun is a star at the center of the Solar System.
Mercury is a planet in the Solar System. Mercury orbits the Sun. Mercury has no atmosphere and no magnetic field.
Venus is a planet in the Solar System. Venus orbits the Sun. Venus has a thick atmosphere. The atmosphere of Venus is composed mainly of carbon dioxide. Venus has no magnetic field.
Earth is a planet in the Solar System. Earth orbits the Sun. Earth has one moon called the Moon. Earth has a thick atmosphere composed mainly of nitrogen and oxygen. Earth has a strong magnetic field.
Mars is a planet in the Solar System. Mars orbits the Sun. Mars has two moons called Phobos and Deimos. Mars has a thin atmosphere composed mainly of carbon dioxide. Mars has a weak magnetic field.
Jupiter is a planet in the Solar System. Jupiter orbits the Sun. Jupiter has moons called Io, Europa, Ganymede, and Callisto. Jupiter has a thick atmosphere composed mainly of hydrogen and helium. Jupiter has a strong magnetic field.
'''
print(text)


The solar system consists of the Sun and the objects that orbit it, including planets, moons, asteroids, comets, and meteoroids.
The Sun is a star at the center of the Solar System.
Mercury is a planet in the Solar System. Mercury orbits the Sun. Mercury has no atmosphere and no magnetic field.
Venus is a planet in the Solar System. Venus orbits the Sun. Venus has a thick atmosphere. The atmosphere of Venus is composed mainly of carbon dioxide. Venus has no magnetic field.
Earth is a planet in the Solar System. Earth orbits the Sun. Earth has one moon called the Moon. Earth has a thick atmosphere composed mainly of nitrogen and oxygen. Earth has a strong magnetic field.
Mars is a planet in the Solar System. Mars orbits the Sun. Mars has two moons called Phobos and Deimos. Mars has a thin atmosphere composed mainly of carbon dioxide. Mars has a weak magnetic field.
Jupiter is a planet in the Solar System. Jupiter orbits the Sun. Jupiter has moons called Io, Europa, Ganymede, and Callis

## Build the KG

Build the KG and store in a Neo4j database

In [26]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from neo4j_graphrag.experimental.components.text_splitters.langchain import LangChainTextSplitterAdapter

In [27]:
kg_pipeline = SimpleKGPipeline(
    driver=driver,
    llm=chat_model,
    embedder=embedder,
    from_pdf=False,
    text_splitter=LangChainTextSplitterAdapter(
        RecursiveCharacterTextSplitter(
            chunk_size=200,
            chunk_overlap=20,
            separators=["\n\n", "\n", " ", ""]
        )
    )
)


In [28]:
await kg_pipeline.run_async(
    text=text
)

PipelineResult(run_id='e0755694-04e9-40ab-9889-8ca4cb484633', result={'resolver': {'number_of_nodes_to_resolve': 6, 'number_of_created_nodes': 0}})

In [48]:
from neo4j_graphrag.indexes import create_vector_index, drop_index_if_exists, create_fulltext_index

# First, drop the existing indexes if they exist
drop_index_if_exists(driver, "text_embeddings")
drop_index_if_exists(driver, "chunk_fulltext")

# Get the embedding dimension by testing with a sample query
sample_embedding = embedder.embed_query("test")
dimensions = len(sample_embedding)
print(f"Embedding dimensions: {dimensions}")

# Create vector index
create_vector_index(
    driver,
    name="text_embeddings",
    label="Chunk",
    embedding_property="embedding",
    dimensions=dimensions,
    similarity_fn="cosine")

# Create full-text index for hybrid search
create_fulltext_index(
    driver,
    name="chunk_fulltext",
    label="Chunk",
    node_properties=["text"])

Embedding dimensions: 1024


In [None]:
# https://neo4j.com/docs/neo4j-graphrag-python/current/api.html#retrievers

In [49]:
vector_retriever = VectorRetriever(
   driver,
   index_name="text_embeddings",
   embedder=embedder
)

In [50]:
import json

vector_res = vector_retriever.get_search_results(
    query_text = "Which planet has a thick atmosphere composed mainly of carbon dioxide?",
    top_k=3)
for i in vector_res.records: print("====n" + json.dumps(i.data(), indent=4))

====n{
    "node": {
        "embedding": null,
        "index": 3,
        "text": "Earth is a planet in the Solar System. Earth orbits the Sun. Earth has one moon called the Moon. Earth has a thick atmosphere composed mainly of nitrogen and oxygen. Earth has a strong magnetic"
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:ee307052-61bb-4a26-b230-a256204ad709:0",
    "id": "4:ee307052-61bb-4a26-b230-a256204ad709:0",
    "score": 0.817542552947998
}
====n{
    "node": {
        "embedding": null,
        "index": 5,
        "text": "Mars is a planet in the Solar System. Mars orbits the Sun. Mars has two moons called Phobos and Deimos. Mars has a thin atmosphere composed mainly of carbon dioxide. Mars has a weak magnetic field."
    },
    "nodeLabels": [
        "__KGBuilder__",
        "Chunk"
    ],
    "elementId": "4:ee307052-61bb-4a26-b230-a256204ad709:2",
    "id": "4:ee307052-61bb-4a26-b230-a256204ad709:2",
    "score": 0.79147791

In [51]:
# and initialize the ChatOllama instance
cypher_model = ChatOllama(
    model=cypher_name,
    validate_model_on_init=True,
    temperature=0
)

In [52]:
cypher_prompt = ChatPromptTemplate.from_messages([
    ("system", """
You are an expert Neo4j Cypher query generator.

TASK:
- Translate the user's natural language question into a SINGLE, valid Cypher query.
- Ensure the query is syntactically correct and follows Cypher rules.

CONSTRAINTS:
- Use ONLY the schema provided below.
- Do NOT invent labels, relationship types, or properties.
- Do NOT explain the query.
- Output ONLY valid Cypher (no additional text, comments, or explanations).
- If the question cannot be answered unambiguously using the schema, output:
  // CANNOT_ANSWER
- Combine all conditions into a single WHERE clause.
- Do not use multiple WHERE clauses or WHERE after RETURN.

GRAPH SCHEMA:
Node labels:
- Star {{name}}
- Planet {{name}}
- Moon {{name}}
- Atmosphere {{description}}
- Substance {{name}}
- PhysicalProperty {{name, value}}

Relationships:
- (Planet)-[:ORBITS]->(Star)
- (Moon)-[:ORBITS]->(Planet)
- (Planet)-[:HAS_ATMOSPHERE]->(Atmosphere)
- (Atmosphere)-[:COMPOSED_OF]->(Substance)
- (Planet)-[:HAS_PROPERTY]->(PhysicalProperty)

QUERY RULES:
1. Always specify node labels.
2. Always specify relationship directions.
3. Use meaningful variable names.
4. Return only properties, not full nodes.
5. Use DISTINCT unless duplicates are required.
6. Use OPTIONAL MATCH if information may be missing.
7. Do not use APOC or procedures.
8. Ensure the query has only one WHERE clause, placed after MATCH clauses.

EXAMPLES:
User: Which planets have atmospheres?
Cypher: MATCH (p:Planet)-[:HAS_ATMOSPHERE]->(a:Atmosphere) RETURN p.name AS planet

User: Which planet has an atmosphere composed of carbon dioxide?
Cypher: MATCH (p:Planet)-[:HAS_ATMOSPHERE]->(a:Atmosphere)-[:COMPOSED_OF]->(s:Substance) WHERE s.name = "carbon dioxide" RETURN p.name AS planet

"""),
    ("human", "{question}")
])

In [53]:
retrieval_query = cypher_model.invoke(
    cypher_prompt.format_messages(
        question="Which planet has a thick atmosphere composed mainly of carbon dioxide?"
    )
)
print("Generated Cypher Query:")
print(retrieval_query)

Generated Cypher Query:
content='MATCH (p:Planet)-[:HAS_ATMOSPHERE]->(a:Atmosphere)-[:COMPOSED_OF]->(s:Substance)\nWHERE a.description = "thick" AND s.name = "carbon dioxide"\nRETURN p.name AS planet' additional_kwargs={} response_metadata={'model': 'tomasonjo/llama3-text2cypher-demo:8b_4bit', 'created_at': '2026-01-27T16:26:14.333715423Z', 'done': True, 'done_reason': 'stop', 'total_duration': 4072673364, 'load_duration': 1815487931, 'prompt_eval_count': 459, 'prompt_eval_duration': 668404788, 'eval_count': 54, 'eval_duration': 1507287340, 'logprobs': None, 'model_name': 'tomasonjo/llama3-text2cypher-demo:8b_4bit', 'model_provider': 'ollama'} id='lc_run--019c0046-ddd4-7311-976b-da9f29366f95-0' tool_calls=[] invalid_tool_calls=[] usage_metadata={'input_tokens': 459, 'output_tokens': 54, 'total_tokens': 513}


In [55]:
hybrid_cypher_retriever = HybridCypherRetriever(
    driver=driver,
    vector_index_name="text_embeddings",
    fulltext_index_name="chunk_fulltext",
    retrieval_query=retrieval_query.content,
    embedder=embedder,
)
query_text = "Which planet does not have a moon?"
retriever_result = hybrid_cypher_retriever.search(query_text=query_text, top_k=3)
print(retriever_result)



items=[] metadata={'query_vector': [-0.028384028, 0.002830663, -0.0043387706, -0.03134606, 0.033290997, -0.0013688451, -0.018562037, 0.002676683, -0.07072638, -0.058787465, -0.0036130387, 0.009364588, -0.02390524, -0.0044236607, -0.04485816, 0.05031527, 0.039242033, 0.051991124, 0.11592515, -0.036538105, -0.0143636, 0.036007, -0.000110765846, 0.07708667, 0.030536084, 0.070412874, -0.045901734, 0.0038264745, -0.0099927755, 0.008953666, 0.057627644, 0.042114053, -0.013641782, -0.0087065, 0.0076421043, -0.007515464, 0.009628162, -0.01154903, -0.032000422, 0.004890959, 0.0023061389, 0.06356441, 0.00091594434, 0.020987108, 0.008918945, -0.024860008, 0.07033663, 0.04460988, 0.03813402, 0.017895207, -0.011243846, -0.03640828, 0.0018654999, 0.013746458, 0.035338946, -0.03678158, -0.010648597, -0.019657258, 0.037519097, -0.038234234, -0.056616012, 0.05410408, 0.0050355177, -0.008825484, -0.012356747, 0.11641918, 0.0021764939, 0.015251316, -0.0189909, -0.016788246, -0.0141896885, -0.030882668, 0