In [90]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

from dotenv import load_dotenv

load_dotenv()

True

In [91]:
graph = Neo4jGraph()


In [15]:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)

In [28]:
llm_type = os.getenv("LLM_TYPE", "openai")
if llm_type == "ollama":
    llm = ChatOllama(model="llama3.1", temperature=0)
else:
    llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")

llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(documents)


In [92]:
graph_documents[0]

GraphDocument(nodes=[Node(id='Amico’S Family', type='Family'), Node(id='Love', type='Concept'), Node(id='Tradition', type='Concept')], relationships=[Relationship(source=Node(id='Amico’S Family', type='Family'), target=Node(id='Love', type='Concept'), type='HAS'), Relationship(source=Node(id='Amico’S Family', type='Family'), target=Node(id='Tradition', type='Concept'), type='HAS')], source=Document(metadata={'source': 'dummytext.txt', 'id': 'ed648f5744c61a703b28736f1a7cf0c0'}, page_content='1. The Story of Amico’s Family: A Legacy of Love and Tradition'))

In [32]:
graph.add_graph_documents(graph_documents,baseEntityLabel=True,include_source=True)

In [93]:
def showGraph():
    driver = GraphDatabase.driver(
        uri=os.getenv("NEO4J_URI"),
        auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
    )
    session = driver.session()
    widget = GraphWidget(graph = session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [94]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

vector_retriever = vector_index.as_retriever()

In [96]:
class Entities(BaseModel):
    """Identifying information about entities"""
    names: list[str] = Field(..., description="All the person, organization, or business entities that appear in the text")

prompt = ChatPromptTemplate.from_messages(
[
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [97]:
entity_chain.invoke({"question": "Who are Nonna Lucia and Giovanni Caruso?"}).names


['Nonna Lucia', 'Giovanni Caruso']

In [100]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question using MATCH queries directly.
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    
    for entity in entities.names:
        print(f"Entity: {entity}")
        response = graph.query(
            """
            MATCH (n:Entity {id: $entity})
            OPTIONAL MATCH (n)-[r:MENTIONS]->(neighbor)
            RETURN n.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
            UNION
            OPTIONAL MATCH (n)<-[r:MENTIONS]-(neighbor)
            RETURN neighbor.id + ' - ' + type(r) + ' -> ' + n.id AS output
            LIMIT 50
            """,
            {"entity": entity}
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [101]:
print(graph_retriever("Who is Nonna Lucia?"))


Entity: Nonna Lucia
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> innovation
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> tradition
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> good food
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Caruso family
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Tradition
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Caruso Family
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Food
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Innovation
a7d2d246c8425014d3dca592ee8b298b - MENTIONS -> Joy
ed648f5744c61a703b28736f1a7cf0c0 - MENTIONS -> Amico’S Family
ed648f5744c61a703b28736f1a7cf0c0 - MENTIONS -> Love
ed648f5744c61a703b28736f1a7cf0c0 - MENTIONS -> Tradition
da1344f72d2b4be3eec4f2c475251de2 - MENTIONS -> Santa Caterina
da1344f72d2b4be3eec4f2c475251de2 - MENTIONS -> Caruso Family
da1344f72d2b4be3eec4f2c475251de2 - MENTIONS -> Sicily
608aa3cb5e0669d9328a2a6f161d768c - MENTIONS -> Love
608aa3cb5e0669d9328a2a6f161d768c - MENTIONS -> Culinary Heritage
608

In [102]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [104]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [107]:
chain.invoke(input="I need a summary of Amico’s Family")


Entity: Amico's Family




'Amico\'s Family is known for its rich culinary legacy rooted in love and tradition. They operate a restaurant called "Amico\'s" in New York City, which offers a modern interpretation of Italian cuisine by blending traditional recipes with contemporary trends. The family is celebrated for creating grand feasts and innovative dishes that combine Sicilian and Tuscan flavors, establishing a signature style that reflects both nostalgia and innovation.'