In [1]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_groq import ChatGroq
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
import os
from neo4j import  Driver
from langchain_core.documents import Document

In [2]:
BOLT_URL =  ""
USERNAME =  ""
PASSWORD =  ""
DATABASE =  ""

In [3]:
import os
from neo4j import GraphDatabase

# Create a Neo4j connection class
class Neo4jConnection:
    def __init__(self, uri, user, password):
        try:
            self._driver = GraphDatabase.driver(uri, auth=(user, password))
            print("Connected to Neo4j successfully!")
        except Exception as e:
            raise RuntimeError(f"Failed to create the driver: {e}")
    
    def close(self):
        if self._driver:
            self._driver.close()
    
    def query(self, query, parameters=None):
        try:
            with self._driver.session(database=DATABASE) as session:
                result = session.run(query, parameters)
                return [record for record in result]
        except Exception as e:
            raise RuntimeError(f"Query failed: {e}")

# Initialize connection
try:
    conn = Neo4jConnection(BOLT_URL, USERNAME, PASSWORD)
    # Example query to test the connection
    test_query = "MATCH (n) RETURN n LIMIT 5"
    result = conn.query(test_query)
    print("Query result:", result)
finally:
    conn.close()

Connected to Neo4j successfully!
Query result: []


In [8]:
groq_api_key=""

In [5]:
graph=Neo4jGraph(
    url=BOLT_URL,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE
)

# Optional to Clear the graph database
cypher = """
    MATCH (n)
    DETACH DELETE n;
"""
graph.query(cypher)

[]

In [7]:
loader = TextLoader(file_path="dummy_data.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)
print(len(documents))

17


In [9]:
llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-8b-instant")
llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [10]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,#additional __entity__ will be created 
    include_source=True #this will create mentions relationship to determine the source document
)

In [11]:
try:
    cypher = """
    CREATE FULLTEXT INDEX `fulltext_entity_id` FOR (n:__Entity__) ON EACH [n.id]
    """
    graph.query(cypher)
except:
    pass

In [12]:
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={"device": 'cpu'},
    encode_kwargs={"normalize_embeddings": True}
)

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    url=BOLT_URL,
    username=USERNAME,
    password=PASSWORD,
    database = DATABASE,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()

In [14]:
#extract entities from given input
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)


entity_chain = llm.with_structured_output(Entities)
entity_chain.invoke("Who is Nonna Lucia?")

Entities(names=['Nonna Lucia'])

In [15]:
# Fulltext index query
def graph_retriever(question: str) -> str:
    """
Note: we made sure that we turned on the baseEntityLabel while adding the documents to the graph then only this fulltext query will work
Step1: get the query and extract the entities
Step2: for each entity find the nodes using fulltext_entity_id
Step3: for each node get all the incoming and outgoing relationships and finally limit the output to 50. Note because we added the source documents we can exclude the Mentions relationship
    """
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """ CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
                YIELD node, score
                WITH node
                MATCH (node)-[r:!MENTIONS]->(neighbor)
                RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
                UNION 
                MATCH (node)<-[r:!MENTIONS]-(neighbor)
                RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
                LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [16]:
print(graph_retriever("Who is Nonna Lucia?"))

Nonna Lucia - ANCESTOR -> Pietro
Lucia - OWNER -> Kitchen
Lucia - OWNER -> Bella Vita
Lucia - SISTER -> Antonio
Lucia - GRANDMOTHER -> Amico
Lucia - MATRIARCH -> Caruso
Lucia - MEMBER -> Caruso Family
Lucia - LIVES_IN -> Los Angeles
Lucia - TEACHER -> Sustainable Cooking Practices
Caruso Family - OWNED -> Amico
Caruso Family - MEMBER -> Antonio
Caruso Family - MEMBER -> Pietro
Caruso Family - MEMBER -> Sofia
Caruso Family - MEMBER -> Lucia
Caruso Family - BASED_IN -> Rome
Caruso Family - BASED_IN -> Los Angeles
Caruso Family - HOSTED -> Culinary Workshops
Caruso Family - SUPPORTED -> Mentorship Programs
Caruso Family - SUPPORTED -> Local Artists
Caruso Family - INVOLVED_IN -> Community
Caruso Family - COLLABORATED -> Local Musicians
Amico - CHILD -> Pietro
Amico - CHILD -> Sofia
Amico - STUDENT -> Lucia
Amico - OWNED -> Amico'S
Amico - HOSTED -> Local Artists
Amico - INITIATOR -> Farm-To-Table
Antonio - PARENT -> Maria
Antonio - PARENT -> Giovanni
Antonio - INHERITOR -> Antonio
Antonio

In [18]:
#here we combine the data from graph retriever function and vector index
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [chunk.page_content for chunk in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

final_context = full_retriever("Who is Nonna Lucia?")
print(final_context)



Graph data:
Nonna Lucia - ANCESTOR -> Pietro
Lucia - OWNER -> Kitchen
Lucia - OWNER -> Bella Vita
Lucia - SISTER -> Antonio
Lucia - GRANDMOTHER -> Amico
Lucia - MATRIARCH -> Caruso
Lucia - MEMBER -> Caruso Family
Lucia - LIVES_IN -> Los Angeles
Lucia - TEACHER -> Sustainable Cooking Practices
Caruso Family - OWNED -> Amico
Caruso Family - MEMBER -> Antonio
Caruso Family - MEMBER -> Pietro
Caruso Family - MEMBER -> Sofia
Caruso Family - MEMBER -> Lucia
Caruso Family - BASED_IN -> Rome
Caruso Family - BASED_IN -> Los Angeles
Caruso Family - HOSTED -> Culinary Workshops
Caruso Family - SUPPORTED -> Mentorship Programs
Caruso Family - SUPPORTED -> Local Artists
Caruso Family - INVOLVED_IN -> Community
Caruso Family - COLLABORATED -> Local Musicians
Amico - CHILD -> Pietro
Amico - CHILD -> Sofia
Amico - STUDENT -> Lucia
Amico - OWNED -> Amico'S
Amico - HOSTED -> Local Artists
Amico - INITIATOR -> Farm-To-Table
Antonio - PARENT -> Maria
Antonio - PARENT -> Giovanni
Antonio - INHERITOR -> Ant

In [22]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

res = chain.invoke("Who is Nonna Lucia? Give brief information about her")



In [21]:
print(res)

Nonna Lucia is the matriarch of the Caruso family. She is a culinary sage, known for her traditional Sicilian recipes and her role as a mentor to her grandchildren, teaching them the art of Sicilian cooking. She is also described as a pillar of strength and wisdom, guiding her family with love and a firm hand.
