In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import ConfigurableField
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_ollama.llms import OllamaLLM
from typing import Tuple, List, Optional
from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase
from langchain.chains import GraphCypherQAChain

from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_community.graphs import Neo4jGraph

import pickle
import os

In [2]:
# OPENAI_API_KEY = 'sk-XwwGmbuN8PZ2IX5CAkejMXIrPpsF4c435PEGWu0NIbT3BlbkFJAW1uxKGNBG1DyPZR43e06JGPrCvPrWKYmbDlTxzMoA'

In [3]:
NEO4J_URI="neo4j+s://9269fc71.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="lOnpSSiAFQg0uWzA1EvYYBhK45_rWbxXb9SHRDwZRfk"

In [5]:
graph = Neo4jGraph()

In [4]:
# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [5]:

# Load the cleaned_documents list from the file
with open('final_pdf_pages.pkl', 'rb') as file:
    docs = pickle.load(file)

print("Documents loaded successfully!")


Documents loaded successfully!


In [6]:
# docs[0]

In [7]:
len(docs)

220

In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    separators=['\n'],
    chunk_size=2024,
    chunk_overlap=204,
    length_function=len,
)

In [9]:
data = text_splitter.split_documents(docs)

In [10]:
len(data)

615

In [11]:
# print(data[0].page_content)

In [12]:
type(data[0])

langchain_core.documents.base.Document

In [26]:
# # Saving Chunked data
# import pickle

# # Save the cleaned_documents list to a file
# with open('chunked_documents.pkl', 'wb') as file:
#     pickle.dump(cleaned_documents, file)

# print("Documents saved successfully!")

In [11]:
llm = OllamaLLM(model="llama3.2")

In [47]:
llm_transformer = LLMGraphTransformer(llm=llm)

In [48]:
graph_documents = llm_transformer.convert_to_graph_documents(data)

In [None]:
graph_documents

In [50]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [6]:
graph_documents = Neo4jGraph(url=os.environ["NEO4J_URI"], username=os.environ["NEO4J_USERNAME"], password=os.environ["NEO4J_PASSWORD"])

In [7]:
graph_documents

<langchain_community.graphs.neo4j_graph.Neo4jGraph at 0x17bb4e190d0>

In [27]:
cypher_generation_template = """
You are an expert Neo4j Cypher translator who converts English to Cypher based on the Neo4j Schema provided, 
specifically for queries related to gastrointestinal and liver diseases. Follow the instructions below:

1. Generate Cypher query compatible ONLY for Neo4j Version 5.
2. Do not use EXISTS, SIZE, HAVING keywords in the Cypher. Use aliases when using the WITH keyword.
3. Use only Nodes and Relationships mentioned in the schema.
4. Always do a case-insensitive and fuzzy search for properties. For example:
   - To search for a Disease, use `toLower(disease.name) contains 'ulcer'`.
   - To search for Symptoms, use `toLower(symptom.name) contains 'pain'`.
5. Never use nodes, properties, or relationships not defined in the given schema.
6. Ensure query logic is precise for medical data retrieval, such as retrieving causes, symptoms, treatments, risk factors, complications, or diagnostic tests.

### Medical Schema (use this schema exclusively):
schema: {schema}

Examples:
Question: What are common symptoms of peptic ulcer disease?
Answer:
```cypher
MATCH (d:Disease)-[:HAS_SYMPTOM]->(s:Symptom)
WHERE toLower(d.name) contains 'peptic ulcer'
RETURN s.name AS Symptom
Question: What treatments are available for gastric ulcer? 
Answer: '''MATCH (d:Disease)-[:TREATED_WITH]->(t:Treatment)
WHERE toLower(d.name) contains 'gastric ulcer'
RETURN t.name AS Treatment'''
Question: Which complications are associated with Crohn's disease?
Answer: '''MATCH (d:Disease)-[:COMPLICATES]->(c:Complication)
WHERE toLower(d.name) contains 'crohn'
RETURN c.name AS Complication'''
"""

In [28]:
cypher_prompt = PromptTemplate(
    template = cypher_generation_template,
    input_variables = ["schema", "question"]
)

In [29]:
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
If the provided information is empty, say that you don't know the answer.
Final answer should be easily readable and structured.
Information:
{context}

Question: {question}
Helpful Answer:"""

In [30]:
qa_prompt = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)


In [35]:
def query_graph(user_input):
    graph = Neo4jGraph(url=os.environ["NEO4J_URI"], username=os.environ["NEO4J_USERNAME"], password=os.environ["NEO4J_PASSWORD"])
    chain = GraphCypherQAChain.from_llm(
        llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True,
        cypher_prompt=cypher_prompt,
        qa_prompt=qa_prompt,
        allow_dangerous_requests=True
        )
    result = chain(user_input)
    return result

In [36]:
user_input = 'what are the common treatments for gastric ulcers?'

In [37]:
result = query_graph(user_input)

  result = chain(user_input)




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Disease)-[:HAS_SYMPTOM]->(s:Symptom)
WHERE toLower(d.name) contains 'peptic ulcer'
RETURN s.name AS Symptom
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


In [40]:
print(result['result'])

Common treatments for gastric ulcers typically include:

* Antacids or acid reducers to neutralize stomach acid
* H2 blockers, such as ranitidine (Zantac) or famotidine (Pepcid), to decrease acid production
* Proton pump inhibitors (PPIs), like omeprazole (Prilosec) or esomeprazole (Nexium), to block the production of stomach acid

In some cases, medications may be prescribed in combination to achieve optimal results. Additionally, lifestyle modifications such as maintaining a healthy diet and avoiding trigger foods can help alleviate symptoms.

It's essential to consult a healthcare professional for personalized treatment recommendations, as they will determine the best course of action based on individual needs and medical history.
