In [1]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
graph = Neo4jGraph()

In [3]:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)

In [10]:
documents.pop(61)

Document(metadata={'source': 'dummytext.txt'}, page_content='contributing their skills and resources to help those in need.')

In [12]:
documents.pop(61)

Document(metadata={'source': 'dummytext.txt'}, page_content='4. Cultural and Culinary Festivals')

In [13]:
documents[61]

Document(metadata={'source': 'dummytext.txt'}, page_content='The Carusos believed in celebrating diversity through food and culture. They organized annual cultural and culinary festivals at their respective restaurants, showcasing traditional Sicilian dishes alongside other international cuisines. These')

In [14]:
llm_type = os.getenv("LLM_TYPE", "ollama")
if llm_type == "ollama":
    llm = ChatOllama(model="llama3.1", temperature=0)
else:
    llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [15]:
i = 0
while i <= len(graph_documents):
    try:
        if i == 3:
            i = i+1
            pass
        graph.add_graph_documents(graph_documents[i:i+1],baseEntityLabel=True,include_source=True)
        i = i+1
        print(i)
    except Exception as e:
        print(e)
        pass


1
2
3
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


In [16]:
graph_documents

[GraphDocument(nodes=[Node(id="Amico's Family", type='Family'), Node(id='The Story of Amico’s Family: A Legacy of Love and Tradition', type='Text')], relationships=[Relationship(source=Node(id="Amico's Family", type='Family'), target=Node(id='The Story of Amico’s Family: A Legacy of Love and Tradition', type='Text'), type='IS_DESCRIBED_IN')], source=Document(metadata={'source': 'dummytext.txt', 'id': '03df7629bb2ac3c69a29fff4d805bae7'}, page_content='1. The Story of Amicoâ€™s Family: A Legacy of Love and Tradition')),
 GraphDocument(nodes=[Node(id='Santa Caterina', type='Location'), Node(id='Sicily', type='Region'), Node(id="Sicily's culinary tapestry", type='Cultural Heritage'), Node(id='Caruso family', type='Family')], relationships=[Relationship(source=Node(id='Santa Caterina', type='Location'), target=Node(id='Sicily', type='Region'), type='LOCATED_IN'), Relationship(source=Node(id='Caruso family', type='Family'), target=Node(id='Santa Caterina', type='Location'), type='ORIGINATES_

In [17]:
def showGraph():
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [18]:
from langchain_community.embeddings import OllamaEmbeddings

In [19]:
def get_embedding_function():
    embeddings = OllamaEmbeddings(model="llama3.1")
    return embeddings

In [20]:
import llama_cpp
import instructor

from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
from pydantic import BaseModel

In [21]:
vector_index = Neo4jVector.from_existing_graph(
    get_embedding_function(),
    
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()


In [22]:
import multiprocessing
from langchain_community.chat_models import ChatLlamaCpp

In [23]:
local_model = r"C:\Users\ADMIN\Downloads\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"

In [24]:
llama = llama_cpp.Llama(
    model_path=local_model,
    n_gpu_layers=-1,
    chat_format="chatml",
    n_ctx=2048,
    
    logits_all=True,
    verbose=False,
)


create = instructor.patch(
    create=llama.create_chat_completion_openai_v1,
    mode=instructor.Mode.JSON_SCHEMA, 
)
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text")
def llama_generated_querry(question):

    user = create(
    messages=[
        {
            "role": "user",
            "content": f"You are extracting organization and person entities from the text {question}",
        }
    ],
    response_model=Entities,
    )

    return user

In [25]:
llama_generated_querry('Who are Nonna Lucia and Giovanni Caruso?')



Entities(names=['Nonna Lucia', 'Giovanni Caruso'])

In [26]:
graph.query("CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

[]

In [27]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    #entities = entity_chain.invoke({"question": question})
    entities = llama_generated_querry(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [34]:
print(graph_retriever('Who are Nonna Lucia and Giovanni Caruso?'))



Generated Query: Nonna~2 AND Lucia~2
Generated Query: Giovanni~2 AND Caruso~2
Nonna Lucia - INFLUENCED -> Amico
Nonna Lucia - TAUGHT -> her grandchildren
Nonna Lucia - TAUGHT -> Amico
Nonna Lucia - HAS_TITLE -> The Matriarch and Mentor
Nonna Lucia - IS_MOTHER_OF -> Matriarch
Nonna Lucia - IS_MENTOR_TO -> Mentor
Amico - LEARNED -> Nonna Lucia's cooking techniquesGiovanni Caruso - PRODUCES -> olives and grapes
Giovanni Caruso - WORKS_AS -> farmer
Giovanni Caruso - LIVES_IN -> Santa Caterina
Giovanni Caruso - IS_DESCENDANT_OF -> Amico
Giovanni Caruso - FAMILY_MEMBER_OF -> Maria


In [29]:
def full_retriever(question: str):
    graph_data = llama_generated_querry(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [31]:
print(full_retriever('Who are Nonna Lucia and Giovanni Caruso?'))



Graph data:
names=['Nonna Lucia', 'Giovanni Caruso']
vector data:

text: Giovanni Caruso and Maria: The Founding Generation#Document 
text: Nonna Lucia: The Matriarch and Mentor#Document 
text: their belief in the power of food to connect people and make a positive impact. The Caruso family's journey is a testament to the enduring power of tradition, innovation, and the joy of sharing good food with others.#Document 
text: live classical music, made it a popular destination. Antonio's ability to blend tradition with modernity, a skill he had honed from his father Giovanni, made "La Dolce Vita" a culinary landmark in Rome.
    


In [32]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [33]:
chain.invoke("tell me a brief about Nonna")



'Nonna is likely the Italian word for "grandmother". In this context, it appears to refer to Lucia, who is described as the "Matriarch and Mentor" in the provided graph data.'