In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair langchain-openai langchain_core

In [None]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_ollama import OllamaEmbeddings
import os
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from neo4j import  Driver

from dotenv import load_dotenv

load_dotenv()

In [None]:
graph = Neo4jGraph()

In [None]:
loader = TextLoader(file_path="dum.txt") #this is the text that'll be read
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=75)
documents = text_splitter.split_documents(documents=docs)

In [None]:
llm = OllamaFunctions(model="deepseek-r1:8b", temperature=0, format="json")
llm_transformer = LLMGraphTransformer(llm=llm)

In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [None]:
print(graph_documents)

In [None]:
for node in graph_documents[0].nodes:
    print(node)

In [None]:
for relationship in graph_documents[0].relationships:
    print(relationship)

In [None]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [None]:
embeddings = OllamaEmbeddings(
    model="nomic-embed-text",
)

In [None]:
test_docs = TextLoader(file_path="dum.txt").load() #same as above, diff chunk_size lang kasi di kaya ng machine ko

test_documents = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=75).split_documents(documents=test_docs)

In [26]:
#embeddings
#test vector store
db = Neo4jVector.from_documents(
    test_documents, embeddings, url= os.environ["NEO4J_URI"],username=os.environ["NEO4J_USERNAME"], password= os.environ["NEO4J_PASSWORD"],embedding_node_property="vector",  
)


ValueError: Index with name vector already exists.The provided embedding function and vector index dimensions do not match.
Embedding function dimension: 768
Vector index dimension: 384

In [None]:
# Now we initialize from existing graph
existing_graph = Neo4jVector.from_existing_graph(
    embedding= embeddings,
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"],
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding",
)
# result = existing_graph.similarity_search("who are the main characters of the story and what are their role")

In [None]:
print(result)

In [None]:
query = "What is the nature of the relationship between the river and the people of Rivershade? How does the town's dependence on the river influence its culture, history, and collective psyche?"
test = db.similarity_search_with_score(query, k=10)

print(test)

for doc, score in test:
        print("-" * 80)
        print("Score: ", score)
        print(doc.page_content)
        print("-" * 80)

In [None]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))

def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
# driver.close()

In [None]:
#create entities or the subj on your query e.g "who is maria?" possible entity is maria then 'Maria' will be search thru the graph DB
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description=""" Extract all the possible subject in the text""",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization, person or the subject as entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)
llm_trans= OllamaFunctions(model="mistral", temperature=0, format="json")
entity_chain = llm_trans.with_structured_output(Entities)

In [15]:
chain = entity_chain.invoke("who is Sophia?")

In [16]:
for entity in chain.names:
    print(f'"{entity}"')

print(chain)

"Sophia"
names=['Sophia']


In [17]:
def get_people(tx, name):
    query = "MATCH (p:__Entity__) WHERE p.id="  + f'"{name}"'  + " RETURN p"
    print(query, name)
    result = tx.run(query)
    return [record["name"] for record in result]

with driver.session() as session:
    for entity in chain.names:
        people = session.execute_read(get_people, entity)
        print("People in Graph:", people)

MATCH (p:__Entity__) WHERE p.id="Sophia" RETURN p Sophia


KeyError: 'name'

In [18]:
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke(question)
    print(entities)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
                YIELD node, score
                CALL {
                WITH node
                MATCH (node)-[r:!MENTIONS]->(neighbor)
                RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
                UNION ALL
                WITH node
                MATCH (node)<-[r:!MENTIONS]-(neighbor)
                RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
                }
                RETURN output LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [19]:
print(graph_retriever("who is sophia?"))

names=['sophia']




Sophia - OCCUPATION -> journalist
Sophia - ARRIVES_IN -> Rivershade
Sophia - WRITES_ABOUT -> town’s history
Sophia - INVESTIGATES -> missing people
Sophia - INVESTIGATES -> Emmett
Sophia - MEETS -> Emmett
e0daafd6d0fb05cf41942aa89b8489b0a5a4f253 - HAS_ENTITY -> Sophia
5613a75b948ed19a8ac7ca53c32354a236e1d861 - HAS_ENTITY -> Sophia


In [20]:
def full_retriever(question: str):
    print(question)
    graph_data = graph_retriever(question)
    print(graph_data)
    docs_with_score = db.similarity_search_with_score(question, k=5)
    page_contents = [doc[0].page_content for doc in docs_with_score]
    print(page_contents)

    final_data = f"""Relationships:
{graph_data}
facts:
{page_contents}
    """
    return final_data

In [21]:
# combination of embedding and graph search
template = """
Answer the question based only on the following context:
{context}

You are an advanced AI designed to analyze and synthesize information from a single provided file: {context} containing both factual details and relationship connections about a subject. 
When answering a question, do not simply extract text but instead interpret and expand upon the provided information by logically inferring connections and implications. 
Identify key facts, analyze relationships, and generate well-structured responses that go beyond surface-level details while maintaining accuracy and coherence. 
Use contextual reasoning to provide insightful and relevant answers. If the required information is not found, acknowledge the limitation while avoiding speculation. 
Always maintain a neutral, well-supported, and logically sound tone in your responses.

when answering a question be confident and make it so like you're the one answering the question based from your knowldege not from external source 
NEVER mention about the data source or where you get the data
when answering a question, answer it fully 

Question: {question}
Use natural human language 


Answer:"""
llm2 = OllamaFunctions(model="mistral", temperature=1, format="json")
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {   
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm2
    | StrOutputParser()
)

In [22]:
#ask a question
chain.invoke(input="How will Eleanor, Margaret, and the others work together to uncover the town's secrets?")

How will Eleanor, Margaret, and the others work together to uncover the town's secrets?
names=['Eleanor', 'Margaret', 'town']




Eleanor - WORKS_AT -> Rivershade Public Library
Eleanor - DECIDES_TO -> investigate the mysterious message
Eleanor - DISCOVERED -> letter
Eleanor - SHARED_FINDINGS_WITH -> Margaret
Eleanor - AGREES_TO_MEET -> Graham
Eleanor - AGREES_TO_MEET -> Clara
Eleanor - AGREES_TO_MEET -> the others
e685839f981c6f91930866f98662b07c56d45dba - HAS_ENTITY -> Eleanor
b8272706fa87be6ed56470a6c78f978e38dfc019 - HAS_ENTITY -> Eleanor
5613a75b948ed19a8ac7ca53c32354a236e1d861 - HAS_ENTITY -> Eleanor
cbf989bf8a55cfd4d13dcd7507f8c8de1b345e25 - HAS_ENTITY -> EleanorMargaret - HAS_ROLE -> Mayor
Margaret - HAS_ROLE -> mayor
Margaret - HAS_SUSPICION_ABOUT -> town’s historic preservation project
Eleanor - SHARED_FINDINGS_WITH -> Margaret
e685839f981c6f91930866f98662b07c56d45dba - HAS_ENTITY -> Margaret
b8272706fa87be6ed56470a6c78f978e38dfc019 - HAS_ENTITY -> Margaret
cbf989bf8a55cfd4d13dcd7507f8c8de1b345e25 - HAS_ENTITY -> MargaretThe group - GATHERS_AT -> town hall
cbf989bf8a55cfd4d13dcd7507f8c8de1b345e25 - HAS_

NameError: name 'db' is not defined