In [1]:
# !pip install graphiti-core langchain-openai langgraph ipywidgets PyMuPDF qdrant-client

Step 1: Environment Setup

In [2]:
import sys,os
__script_path=os.path.abspath(globals().get('__file__','.'))
__script_dir = os.path.dirname(__script_path)
root_dir = os.path.abspath(f'{__script_dir}/..')
print(root_dir)
for lib in [root_dir][::-1]:
    if lib in sys.path:
        sys.path.remove(lib)
    sys.path.insert(0,lib)
from configs.config import *


c:\Users\Admin\Data\WDM-AI-TEMIS
c:/Users/Admin/Data/WDM-AI-TEMIS
root_dir: c:/Users/Admin/Data/WDM-AI-TEMIS
c:/Users/Admin/Data/WDM-AI-TEMIS/data


In [3]:
import asyncio
import json
import os
import sys
from pathlib import Path
from typing import Annotated
import fitz  # PyMuPDF
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, PointStruct
from langchain_openai import OpenAIEmbeddings
from graphiti_core import Graphiti
from graphiti_core.edges import EntityEdge
from graphiti_core.nodes import EpisodeType
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())


True

In [4]:
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
NEO4J_URI=os.getenv("NEO4J_URI")
NEO4J_USERNAME=os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD=os.getenv("NEO4J_PASSWORD")
QDRANT_URL=os.getenv("QDRANT_URL")

Step 2: Verify Neo4j and Qdrant Connections

Ensure Neo4j and Qdrant are running locally.

In [5]:
from neo4j import GraphDatabase
from qdrant_client import QdrantClient

# Neo4j connection
neo4j_driver = GraphDatabase.driver(
    NEO4J_URI,
    auth=(NEO4J_USERNAME, NEO4J_PASSWORD)
)
with neo4j_driver.session() as session:
    result = session.run("RETURN 1")
    assert result.single()[0] == 1, "Neo4j connection failed"
print("Neo4j connected successfully")


qdrant_client = QdrantClient(
   path=f"{exps_dir}/qdrant_client_memory",
)
qdrant_client.get_collections()
print("Qdrant connected successfully")

Neo4j connected successfully
Qdrant connected successfully


In [None]:
from qdrant_client.models import VectorParams, Distance

qdrant_client.create_collection(
    collection_name='graphrag_docs',
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

Step 3: Data Ingestion

Load Sample Data

Use Wikipedia articles about "Vietnam" as the dataset. Replace with your own data as needed.

In [None]:
from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

# Load Wikipedia data
raw_documents = WikipediaLoader(query="Vietnam").load()
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])
print(f"Loaded {len(documents)} document chunks")

Create Knowledge Graph with Graphiti

Use Graphiti to extract entities and relationships and store them in Neo4j. Graphiti simplifies graph construction with episodic and relational data management.

In [None]:
from graphiti_core import Graphiti
from graphiti_core.nodes import Episode
from langchain_openai import ChatOpenAI

# Initialize LLM
llm = ChatOpenAI(temperature=0, model_name="gpt-4o", openai_api_key=os.getenv("OPENAI_API_KEY"))

# Initialize Graphiti
graphiti = Graphiti(
    uri=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    llm=llm
)

# Ingest documents as episodes
for idx, doc in enumerate(documents):
    episode = Episode(
        name=f"Vietnam_Doc_{idx}",
        content=doc.page_content,
        source="Wikipedia",
        metadata=doc.metadata
    )
    graphiti.add_episode(episode)
print("Knowledge graph created with Graphiti in Neo4j")

In [None]:
from langchain_qdrant import QdrantVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from qdrant_client.http.models import Distance, VectorParams

# Initialize embeddings
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create Qdrant collection
collection_name = "vietnam"
qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

# Initialize Qdrant vector store
qdrant = QdrantVectorStore(
    client=qdrant_client,
    collection_name=collection_name,
    embedding=embedder
)

# Add documents to Qdrant
qdrant.add_documents(documents)
print(f"Stored {len(documents)} document embeddings in Qdrant")

Step 4: Parallel Hybrid Retrieval

Implement Parallel Retrieval

Create a custom retriever that runs Qdrant vector search and Graphiti-based Neo4j graph search in parallel using Python’s asyncio.

In [None]:
import asyncio
from langchain_core.documents import Document
from neo4j_graphrag.retrievers import QdrantNeo4jRetriever

# Custom parallel retriever
class ParallelHybridRetriever:
    def __init__(self, qdrant_client, neo4j_driver, collection_name, embedder, graphiti):
        self.qdrant_retriever = QdrantNeo4jRetriever(
            driver=neo4j_driver,
            client=qdrant_client,
            collection_name=collection_name,
            id_property_external="neo4j_id",
            id_property_neo4j="id",
            embedder=embedder
        )
        self.graphiti = graphiti
        self.embedder = embedder

    async def qdrant_search(self, query, top_k):
        results = self.qdrant_retriever.search(query_text=query, top_k=top_k)
        return [{"text": res.node.properties.get("text", ""), "score": res.score} for res in results]

    async def graphiti_search(self, query, top_k):
        results = await self.graphiti.search(query=query, k=top_k)
        return [{"text": res.node.content, "score": res.score} for res in results]

    async def search(self, query, top_k=2):
        # Run Qdrant and Graphiti searches in parallel
        qdrant_task = self.qdrant_search(query, top_k)
        graphiti_task = self.graphiti_search(query, top_k)
        qdrant_results, graphiti_results = await asyncio.gather(qdrant_task, graphiti_task)

        # Merge results (simple union with deduplication by text)
        merged_results = []
        seen_texts = set()
        for res in qdrant_results + graphiti_results:
            if res["text"] not in seen_texts:
                merged_results.append(res)
                seen_texts.add(res["text"])
        return merged_results[:top_k]

# Initialize retriever
retriever = ParallelHybridRetriever(
    qdrant_client=qdrant_client,
    neo4j_driver=neo4j_driver,
    collection_name=collection_name,
    embedder=embedder,
    graphiti=graphiti
)

# Test parallel retrieval
async def test_retrieval():
    query = "What is the history of Vietnam's independence?"
    results = await retriever.search(query, top_k=2)
    for res in results:
        print(f"Text: {res['text'][:100]}..., Score: {res['score']}")

# Run test
await test_retrieval()

Step 5: Set Up GraphQL API

Define GraphQL Schema

Create a GraphQL schema to query the hybrid retriever.

In [None]:
from ariadne import QueryType, gql, make_executable_schema
from ariadne.asgi import GraphQL

# Define GraphQL schema
type_defs = gql("""
    type Query {
        search(query: String!, top_k: Int!): [Result!]!
    }

    type Result {
        text: String!
        score: Float!
    }
""")

# Define resolvers
query = QueryType()

@query.field("search")
async def resolve_search(_, info, query, top_k):
    results = await retriever.search(query, top_k)
    return [{"text": res["text"], "score": res["score"]} for res in results]

# Create executable schema
schema = make_executable_schema(type_defs, query)

In [None]:
from fastapi import FastAPI
from ariadne.asgi.handlers import GraphQLHTTPHandler

app = FastAPI()
app.mount("/graphql", GraphQL(schema, debug=True))

# Run the server (execute in terminal: uvicorn script:app --host 0.0.0.0 --port 8000)

query {
  search(query: "What is the history of Vietnam's independence?", top_k: 2) {
    text
    score
  }
}

In [None]:
# Clear Neo4j database
with neo4j_driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")
print("Neo4j database cleared")

# Delete Qdrant collection
qdrant_client.delete_collection(collection_name)
print(f"Qdrant collection {collection_name} deleted")