In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.pydantic_v1 import BaseModel, Field
import pickle
import os
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector

from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate

from typing import Tuple, List, Optional

from langchain_ollama import ChatOllama
import torch

import warnings
warnings.simplefilter('ignore')


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
NEO4J_URI="neo4j+s://9269fc71.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="lOnpSSiAFQg0uWzA1EvYYBhK45_rWbxXb9SHRDwZRfk"

In [3]:
# Configure Neo4j and load embeddings
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [4]:
graph = Neo4jGraph()

In [5]:
graph_documents = Neo4jGraph(
    url=os.environ["NEO4J_URI"], username=os.environ["NEO4J_USERNAME"], password=os.environ["NEO4J_PASSWORD"]
)

In [6]:
# embedding_device = torch.device("cuda:0")  # GPU 0 for embedding model
# llm_device = torch.device("cuda:1")

In [7]:
# embedding_device

In [8]:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
# model_kwargs = {"device": embedding_device.type}
model_kwargs = {"device": 'cpu'}
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

In [9]:
vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [10]:
llm = ChatOllama(model='llama3.2', temperature=0, num_gpu=1)

In [51]:
# llm = OllamaLLM(model="llama3.2")

In [11]:
fusion_template = """
        Task: You are an assistant that generates multiple variations of a given question. 
        For each variation, maintain the original intent of the question, but change the phrasing, structure, 
        or tone to create a diverse set of queries.

Generate 5-7 variations that cover:

Synonym replacements while keeping the question concise.
Alternative structures, such as rephrasing into "why," "how," or "what" forms if relevant.
Casual and formal tones.
Slightly more specific or broader wording.
Examples:

Original Question: "How is a gastric ulcer diagnosed?"
Variations:
    "What tests can be used to detect stomach ulcers?"
    "Why are endoscopies commonly used in gastric ulcer diagnosis?"
    "How do doctors figure out if someone has a stomach ulcer?"
    "Can an upper endoscopy be used to diagnose all types of gastric ulcers, or are there other methods more effective for certain cases?"
    "What tests and procedures are typically involved in diagnosing gastric ulcers, including acid reflux disease?"
    "How can a patient determine if they have been diagnosed with a stomach ulcer based on symptoms, test results, or imaging studies?"
    "When considering the diagnosis of a gastric ulcer, what role does symptom severity play in determining the need for further testing or treatment?"

Now generate just the list of variations one on each line for the given question and nothing else
        """

In [12]:
prompt = ChatPromptTemplate(input_variables=['original_query'],
                            messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[],template=fusion_template)),
                            HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['original_query'], template='Generate multiple search queries related to: {question} \n OUTPUT (5 queries):'))])
     

In [13]:
original_query = 'How do NSAIDs induce gastric mucosal injury leading to ulcers?'

In [14]:
generate_queries = prompt | llm | StrOutputParser() | (lambda x: x.split("\n"))

In [15]:
generate_queries.invoke('What causes gastric ulcers?')

['What triggers stomach ulcers?',
 'How do gastric ulcers form in the body?',
 'What are the common causes of stomach ulcers?',
 'Can stress contribute to the development of gastric ulcers?',
 'What medical conditions increase the risk of developing a gastric ulcer?']

In [65]:
# Define the schema without validators to avoid deepcopy issues
class Entities(BaseModel):
    """Identifying information about entities."""

    nodes: List[str] = Field(
        ...,
        description=(
            "All relevant entities appearing in the text, such as Disease, Cause, Symptom, Complication, Treatment, "
            "Medication, Test, Risk Factor, Diagnosis, Side Effect, Procedure, Condition Stage, Disease Type, and Imaging Type. "
            "Extract at least one entity, even if the question is not explicit."
        ),
    )

In [132]:
# Define the prompt template with instructions for node extraction
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are tasked with extracting the diseases, conditions, Diagnosis or symptoms from the qiven question "
            "Consider to extract the nouns and important action verbs in the sentence like causes, contribute, diagnosed, used, detect, work, treat, suspect, bleeding, treated, relate, influence, induce, leading, used, diagnose, differentiate."
            "for example, for the question: How is a gastric ulcer diagnosed?, you are going to extract all the nouns and min verbs like the nouns are gastric ulcer and the main verb is diagnosed"
            "Ignore all the auxiliary verbs"
            
            # "You are tasked with extracting Disease, Cause, Symptom, Complication, Treatment, Medication, Test, Risk Factor, Diagnosis, Side Effect, Procedure, Condition Stage, Disease Type, Imaging Type from the text"
        ),
        (
            "human",
            "Use the specified format to extract information from the following input: {question}."
        ),
    ]
)

In [133]:
entity_chain = prompt | llm.with_structured_output(Entities)

In [134]:
entity_chain.invoke({"question": "How do NSAIDs induce gastric mucosal injury leading to ulcers?"})

Entities(nodes=['Disease', 'Gastric Mucosa', 'Symptom', 'Ulcers', 'Cause', 'NSAIDs'])

In [111]:
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [81]:
# Fulltext index query
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.nodes:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query)
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [None]:
print(structured_retriever("How do NSAIDs induce gastric mucosal injury leading to ulcers?"))

In [83]:
def combined_retriever(queries):
    combined_results = []
    for query in queries:
        structured_data = structured_retriever(query)
        unstructured_data = [
            doc.page_content for doc in vector_index.similarity_search(query)
        ]
        combined_results.append({"structured": structured_data, "unstructured": "#Document ". join(unstructured_data)})
    return combined_results

In [136]:
# Combining results and generating the final response
def combined_fusion_graph_rag(original_query):

    queries = generate_queries.invoke(original_query)
    
    # Step 2: Retrieve data for each query
    retrieved_data = combined_retriever(queries)
    
    # Step 3: Combine retrieved results
    combined_context = "\n".join(
        f"Structured:\n{result['structured']}\nUnstructured:\n{' '.join(result['unstructured'])}"
        for result in retrieved_data
    )
    
    # Step 4: Generate final response
    answer_template = """Answer the question based only on the following context which is extract from knowledge graph. You have the freedom to interpret the context however you want, At the end, you have to mention correct option.
    Also do not mention that you got the answer from the context which means do mention anything like "the text mentioned" and similar things. context:
    {context}
    
    Question: {question}
    """
    final_prompt = ChatPromptTemplate.from_template(answer_template)
    chain = final_prompt | llm | StrOutputParser()
    return chain.invoke({"context": combined_context, "question": original_query})

In [85]:
# Combining results and generating the final response
def combined_fusion_graph_rag_context(original_query):

    queries = generate_queries.invoke(original_query)
    
    # Step 2: Retrieve data for each query
    retrieved_data = combined_retriever(queries)
    
    # Step 3: Combine retrieved results
    combined_context = "\n".join(
        f"Structured:\n{result['structured']}\nUnstructured:\n{' '.join(result['unstructured'])}"
        for result in retrieved_data
    )
    
    return combined_context

In [86]:
# answers = []

In [125]:
sample = "The triple therapy consisting of PPI, clarithromycin, and amoxicillin was administered as an eradication treatment for H. pylori. Which of the following tests is the most appropriate for determining the success of eradication?\n1. H. pylori culture test\n2. H. pylori serology test\n3. Urea breath test\n4. Endoscopic biopsy\n5. Rapid urease test"

In [None]:
%%time
final_result = combined_fusion_graph_rag(sample)
# context = combined_fusion_graph_rag_context(sample)
# print(context)
# answers.append(final_result)

In [127]:
print(final_result)

The correct answer is:

3. Urea breath test

Explanation:
The urea breath test (UBT) is the most appropriate test for determining the success of eradication treatment for H. pylori, especially in cases where clarithromycin resistance is suspected or confirmed.

Here's why:

* The UBT measures the level of carbon-13 labeled urea in the breath, which is produced when urease breaks down urea into ammonia and carbon dioxide. If H. pylori is present, it will produce urease and increase the levels of carbon-13 labeled urea in the breath.
* The test is non-invasive and does not require endoscopy or biopsy, making it a convenient and cost-effective option for monitoring treatment success.
* The UBT can detect active infection even when the patient has completed eradication therapy, whereas serology tests may remain positive for months or years after successful treatment.
* The test is also useful for detecting clarithromycin resistance, as it can distinguish between strains that are susceptibl

In [118]:
copy_answer = answers

In [137]:
questions = [
    "The triple therapy consisting of PPI, clarithromycin, and amoxicillin was administered as an eradication treatment for H. pylori. Which of the following tests is the most appropriate for determining the success of eradication?\n1. H. pylori culture test\n2. H. pylori serology test\n3. Urea breath test\n4. Endoscopic biopsy\n5. Rapid urease test",
    "A 40-year-old female patient presented with epigastric pain that started two weeks ago and intermittent dizziness that began a week ago. Her medical history includes taking painkillers (aceclofenac) for knee arthritis and anticoagulants for deep vein thrombosis. She also has a recent history of antibiotic use for bronchitis. Abdominal physical examination showed no abnormalities, but the rectal examination was positive. Vital signs were stable. Blood tests revealed a hemoglobin level of 8 g/dL. Gastroscopy did not show active bleeding, but an H1 ulcer was found in the antrum, and the rapid urease test was negative. Considering the patient needs to continue pain relief due to severe arthritis pain, what is the most effective treatment in this situation?\n1. Co-administration of an H2 receptor antagonist\n2. Switching to another COX-1 inhibitor and administering misoprostol\n3. Switching to a COX-2 inhibitor\n4. Switching to a COX-2 inhibitor and administering a PPI\n5. Switching to another COX-1 inhibitor",
    "What is the most common cause of gastric ulcers?\n1. Helicobacter pylori infection\n2. Stress\n3. Alcohol consumption\n4. Smoking",
    "Which of the following is NOT a major symptom of gastric ulcers?\n1. Heartburn\n2. Weight loss\n3. Cough\n4. Vomiting",
    "What is the most useful test for diagnosing gastric ulcers?\n1. Blood test\n2. Gastroscopy\n3. Ultrasound\n4. X-ray",
    "Which non-invasive test is appropriate for diagnosing Helicobacter pylori infection?\n1. Blood test\n2. Histology\n3. Urea breath test\n4. Gastroscopy",
    "Which of the following is NOT used in the treatment of gastric ulcers?\n1. PPI (Proton Pump Inhibitor)\n2. H2 receptor antagonist\n3. Antibiotics\n4. Insulin",
    "What is the recommended dietary habit for patients with gastric ulcers?\n1. Eating spicy foods\n2. Regular meals\n3. Fasting\n4. Encouraging alcohol and smoking",
    "Why can NSAIDs (Non-Steroidal Anti-Inflammatory Drugs) cause gastric ulcers?\n1. Because they promote digestion\n2. Because they inhibit the production of prostaglandins that protect the gastric mucosa\n3. Because they enhance immune function\n4. Because they increase blood pressure",
    "What is the first thing to do when acute bleeding occurs in a gastric ulcer patient?\n1. Provide intravenous fluids\n2. Administer antibiotics\n3. Surgery\n4. Administer painkillers",
    "What lifestyle habit is recommended to prevent the recurrence of gastric ulcers after treatment?\n1. Regular exercise and stress management\n2. Eating spicy foods\n3. Skipping meals\n4. Keep smoking",
    "Which medication class is most effective in reducing gastric acid secretion for ulcer treatment?\n1. Antacids\n2. Proton Pump Inhibitors (PPIs)\n3. Antibiotics\n4. Analgesics",
    "What is the most common site for gastric ulcers?\n1. Fundus of the stomach\n2. Antrum of the stomach\n3. Cardia of the stomach\n4. Duodenum",
    "Which of the following is a typical finding in a patient with a bleeding gastric ulcer?\n1. Increased hemoglobin levels\n2. Melena (black, tarry stools)\n3. Jaundice\n4. Polyuria",
    "Which treatment approach is NOT commonly used for managing gastric ulcers?\n1. Antacids for immediate relief\n2. Lifestyle modifications\n3. Corticosteroids\n4. Eradication of H. pylori infection",
    "50-year-old male presents with burning epigastric pain that worsens with meals and is relieved by antacids. He has a history of smoking and occasional alcohol use. Physical examination reveals mild epigastric tenderness. What is the most likely diagnosis, and what diagnostic test should be performed next?. Most likely diagnosis:\n1. Gastritis\n2. Gastric ulcer\n3. GERD\n4. Pancreatitis",
    "50-year-old male presents with burning epigastric pain that worsens with meals and is relieved by antacids. He has a history of smoking and occasional alcohol use. Physical examination reveals mild epigastric tenderness. What is the most likely diagnosis, and what diagnostic test should be performed next?. Most likely Diagnostic test:\n1. Abdominal ultrasound\n2. Blood test\n3. Upper endoscopy\n4. Barium swallow",
    "A 45-year-old female reports a 3-month history of intermittent, dull, gnawing pain in the upper abdomen. She also mentions nausea and occasional vomiting. She has been taking NSAIDs for chronic back pain. Her stool test is positive for occult blood. What is the most likely cause of her symptoms, and what should be the initial management step?\n Most likely cause:\n1. Gallstones\n2. NSAID-induced gastric ulcer\n3. Irritable bowel syndrome\n4. Pancreatitis",
    "A 45-year-old female reports a 3-month history of intermittent, dull, gnawing pain in the upper abdomen. She also mentions nausea and occasional vomiting. She has been taking NSAIDs for chronic back pain. Her stool test is positive for occult blood. What is the most likely cause of her symptoms, and what should be the initial management step?\nInitial management step:\n1. Continue NSAIDs and add antacids\n2. Discontinue NSAIDs and start a Proton Pump Inhibitor (PPI)\n3. Perform surgery\n4. Start antibiotics",
    "A 60-year-old male presents with sudden onset of severe abdominal pain, rigidity, and a history of chronic gastric ulcer. He appears pale and diaphoretic. Vital signs indicate hypotension and tachycardia. What complication should be suspected, and what is the immediate management?\n Suspected complication:\n1. Gastric cancer\n2. Perforated gastric ulcer\n3. Gallbladder disease\n4. Pancreatitis\n",
    "A 60-year-old male presents with sudden onset of severe abdominal pain, rigidity, and a history of chronic gastric ulcer. He appears pale and diaphoretic. Vital signs indicate hypotension and tachycardia. What complication should be suspected, and what is the immediate management?\n Immediate management:\n1. Administer painkillers\n2. Emergency surgery and resuscitation with fluids\n3. Perform blood tests\n4. Start antibiotics",
    "A 55-year-old male with a history of gastric ulcers treated in the past presents with weight loss, anorexia, and persistent epigastric pain unresponsive to PPIs. An upper endoscopy reveals an ulcer with irregular, raised borders in the gastric antrum. Biopsy results are pending. What is the most concerning potential diagnosis, and what should be the next step in management?\n Most concerning potential diagnosis:\n1. Benign gastric ulcer\n2. Gastric cancer\n3. GERD\n4. Gastritis",
    "A 55-year-old male with a history of gastric ulcers treated in the past presents with weight loss, anorexia, and persistent epigastric pain unresponsive to PPIs. An upper endoscopy reveals an ulcer with irregular, raised borders in the gastric antrum. Biopsy results are pending. What is the most concerning potential diagnosis, and what should be the next step in management?\n Next step in management:\n1. Prescribe antibiotics\n2. Increase PPI dosage\n3. Await biopsy results and consider referral to oncology\n4. Start corticosteroids",
    "A 48-year-old female presents with epigastric pain and black, tarry stools for the past two days. She has a history of rheumatoid arthritis and has been on long-term NSAID therapy. On examination, she appears pale and has a heart rate of 110 bpm. Her hemoglobin level is 7 g/dL. What is the most likely diagnosis, and what is the immediate management step?\n Most likely diagnosis:\n1. GERD\n2. Peptic ulcer with bleeding\n3. Gastric cancer\n4. Pancreatitis",
    "A 48-year-old female presents with epigastric pain and black, tarry stools for the past two days. She has a history of rheumatoid arthritis and has been on long-term NSAID therapy. On examination, she appears pale and has a heart rate of 110 bpm. Her hemoglobin level is 7 g/dL. What is the most likely diagnosis, and what is the immediate management step?\n Immediate management step:\n1. Administer painkillers\n2. Immediate blood transfusion and endoscopic intervention\n3. Start a high-fiber diet\n4. Increase NSAID dosage"
]


In [128]:
# access_token = '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'

In [129]:
# from transformers import AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B", token=access_token)
# text = context
# tokens = tokenizer.tokenize(text)
# num_tokens = len(tokens)
# print(f"Number of tokens in your text: {num_tokens}")

In [138]:
answers = []

In [139]:
for question in questions:
    result = combined_fusion_graph_rag(question)
    answers.append(result)



In [140]:
import pandas as pd

In [141]:
# Save to Excel and CSV
data = {"Question": questions, "Answer": answers}
df = pd.DataFrame(data)

output_excel_path = "output_fusgraph_options_excel.xlsx"
output_csv_path = "output_fusgraph_options_csv.csv"

df.to_excel(output_excel_path, index=False)
df.to_csv(output_csv_path, index=False)

print(f"Answers saved to {output_excel_path} and {output_csv_path}")

Answers saved to output_fusgraph_options_excel.xlsx and output_fusgraph_options_csv.csv


In [142]:
df

Unnamed: 0,Question,Answer
0,"The triple therapy consisting of PPI, clarithr...",The correct answer is:\n\n3. Urea breath test\...
1,A 40-year-old female patient presented with ep...,The correct answer is:\n\n4. Switching to a CO...
2,What is the most common cause of gastric ulcer...,The correct answer is:\n\n1. Helicobacter pylo...
3,Which of the following is NOT a major symptom ...,The correct answer is:\n\n3. Cough\n\nGastric ...
4,What is the most useful test for diagnosing ga...,The correct answer is:\n\n2. Gastroscopy\n\nGa...
5,Which non-invasive test is appropriate for dia...,The correct answer is:\n\n3. Urea breath test\...
6,Which of the following is NOT used in the trea...,The correct answer is:\n\n4. Insulin\n\nInsuli...
7,What is the recommended dietary habit for pati...,The correct answer is:\n\n2. Regular meals\n\n...
8,Why can NSAIDs (Non-Steroidal Anti-Inflammator...,The correct answer is:\n\n2. Because they inhi...
9,What is the first thing to do when acute bleed...,The correct answer is:\n\n1. Provide intraveno...
