In [26]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.pydantic_v1 import BaseModel, Field
import pickle
import os
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector

from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate

from typing import Tuple, List, Optional

from langchain_ollama import ChatOllama

In [2]:
NEO4J_URI="neo4j+s://9269fc71.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="lOnpSSiAFQg0uWzA1EvYYBhK45_rWbxXb9SHRDwZRfk"

In [3]:
# Configure Neo4j and load embeddings
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [4]:
graph = Neo4jGraph()

In [5]:
graph_documents = Neo4jGraph(
    url=os.environ["NEO4J_URI"], username=os.environ["NEO4J_USERNAME"], password=os.environ["NEO4J_PASSWORD"]
)

In [6]:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)
  from tqdm.autonotebook import tqdm, trange


In [9]:
vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [27]:
llm = ChatOllama(model='llama3.2', temperature=0)

In [10]:
# llm = OllamaLLM(model="llama3.2")

In [79]:
fusion_template = """
        Task: You are an assistant that generates multiple variations of a given question. 
        For each variation, maintain the original intent of the question, but change the phrasing, structure, 
        or tone to create a diverse set of queries.

Generate 5-7 variations that cover:

Synonym replacements while keeping the question concise.
Alternative structures, such as rephrasing into "why," "how," or "what" forms if relevant.
Casual and formal tones.
Slightly more specific or broader wording.
Examples:

Original Question: "How is a gastric ulcer diagnosed?"
Variations:
    "What tests can be used to detect stomach ulcers?"
    "Why are endoscopies commonly used in gastric ulcer diagnosis?"
    "How do doctors figure out if someone has a stomach ulcer?"
    "Can an upper endoscopy be used to diagnose all types of gastric ulcers, or are there other methods more effective for certain cases?"
    "What tests and procedures are typically involved in diagnosing gastric ulcers, including acid reflux disease?"
    "How can a patient determine if they have been diagnosed with a stomach ulcer based on symptoms, test results, or imaging studies?"
    "When considering the diagnosis of a gastric ulcer, what role does symptom severity play in determining the need for further testing or treatment?"

Now generate just the list of variations one on each line for the given question and nothing else
        """

In [80]:
prompt = ChatPromptTemplate(input_variables=['original_query'],
                            messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[],template=fusion_template)),
                            HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['original_query'], template='Generate multiple search queries related to: {question} \n OUTPUT (5 queries):'))])
     

In [81]:
original_query = 'How do NSAIDs induce gastric mucosal injury leading to ulcers?'

In [82]:
generate_queries = prompt | llm | StrOutputParser() | (lambda x: x.split("\n"))

In [84]:
generate_queries.invoke('What causes gastric ulcers?')

['What triggers stomach ulcers?',
 'How do gastric ulcers form in the body?',
 'What are the common causes of stomach ulcers?',
 'Can stress contribute to the development of gastric ulcers?',
 'What medical conditions increase the risk of developing a gastric ulcer?']

In [32]:
# Define the schema without validators to avoid deepcopy issues
class Entities(BaseModel):
    """Identifying information about entities."""

    nodes: List[str] = Field(
        ...,
        description="All the Disease, Cause, Symptom, Complication, Treatment, Medication, Test, Risk Factor, Diagnosis, Side Effect, Procedure, Condition Stage, Disease Type, Imaging Type"
                    "that appear in the text"
    )

In [33]:
# Define the prompt template with instructions for node extraction
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are tasked with extracting the diseases, conditions, Diagnosis or symptoms from the qiven question "
            " for example, for the question: How is a gastric ulcer diagnosed?, you are going to find that gastric ulcer is a dieases and you also found that diagnosis is used in the question, so you are going to return gastric ulcer"
            # "You are tasked with extracting Disease, Cause, Symptom, Complication, Treatment, Medication, Test, Risk Factor, Diagnosis, Side Effect, Procedure, Condition Stage, Disease Type, Imaging Type from the text"
        ),
        (
            "human",
            "Use the specified format to extract information from the following input: {question}."
        ),
    ]
)

In [34]:
entity_chain = prompt | llm.with_structured_output(Entities)

In [35]:
entity_chain.invoke({"question": "How do NSAIDs induce gastric mucosal injury leading to ulcers?"})

Entities(nodes=['Gastric ulcer', 'NSAID-induced gastric mucosal injury'])

In [36]:
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [50]:
# def structured_retriever(question: str) -> str:
#     result = ""
#     # Extract entities
#     entities = entity_chain.invoke({"question": question})
#     if not entities.nodes:
#         raise ValueError(f"No entities extracted from the question: {question}")
    
#     # Query each entity
#     for entity in entities.nodes:
#         query = generate_full_text_query(entity)
#         if not query:
#             raise ValueError(f"Generated an empty full-text query for entity: {entity}")
        
#         # Execute the full-text query
#         response = graph.query(
#             """
#             CALL db.index.fulltext.queryNodes('entityIndex', $query)
#             YIELD node, score
#             RETURN node.name AS name, score
#             """,
#             {"query": query}
#         )
        
#         # Handle no results
#         if not response:
#             result += f"No results found for entity: {entity}\n"
#         else:
#             result += "\n".join([f"Entity: {record['name']}, Score: {record['score']}" for record in response])
#     return result


In [52]:
# Fulltext index query
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.nodes:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query)
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [72]:
print(structured_retriever("What causes gastric ulcers?"))



Gastric ulcer - ASSOCIATED_WITH -> Bleeding
Gastric ulcer - ASSOCIATED_WITH -> Peptic ulcer disease
Gastric ulcer - ASSOCIATED_WITH -> Mass
Gastric ulcer - HAS_SYMPTOM -> Vomiting
Gastric ulcer - HAS_SYMPTOM -> Nausea
Gastric ulcer - HAS_SYMPTOM -> Pain soon after meals
Gastric ulcer - HAS_SYMPTOM -> Weight loss
Gastric ulcer - HAS_SYMPTOM -> Anorexia
H2RAs - TREATED_WITH -> Gastric ulcer
PPIs - TREATED_WITH -> Gastric ulcer
Sucralfate - TREATED_WITH -> Gastric ulcer
Misoprostol - TREATED_WITH -> Gastric ulcer
Proton pump inhibitors - TREATED_WITH -> Gastric ulcer
P-CAB - TREATED_WITH -> Gastric ulcer
Pirenzepine - TREATED_WITH -> Gastric ulcer
proton pump inhibitors - TREATED_WITH -> gastric ulcer
Gastric Ulcer - HAS_SYMPTOM -> ulcer stenosis; pyloric stenosis; oesophageal gastric varices; Barrett's oesophagus measuring >3 cm; intractable ulcer; digestive ulcer perforation or malignancy on upper GI endoscopy
Gastric Ulcer - HAS_SYMPTOM -> GI symptoms
Gastric Ulcer - COMPLICATES -> adv

In [55]:
def combined_retriever(queries):
    combined_results = []
    for query in queries:
        structured_data = structured_retriever(query)
        unstructured_data = [
            doc.page_content for doc in vector_index.similarity_search(query)
        ]
        combined_results.append({"structured": structured_data, "unstructured": "#Document ". join(unstructured_data)})
    return combined_results

In [86]:
# Combining results and generating the final response
def combined_fusion_graph_rag(original_query):
    # Step 1: Generate query variations
    # raw_output = generate_queries.invoke(original_query)

    # # Debugging: print raw output
    # print("Raw Output from generate_queries:", raw_output)

    # # Extract queries from raw output
    # queries = []
    # for line in raw_output:
    #     line = line.strip()
    #     if line.startswith('"') and line.endswith('",'):
    #         # Remove trailing comma and quotes
    #         queries.append(line.strip('",'))
    #     elif line.startswith('"') and line.endswith('"'):
    #         queries.append(line.strip('"'))

    # # Debugging: print parsed queries
    # print("Parsed Queries:", queries)

    queries = generate_queries.invoke(original_query)
    
    # Step 2: Retrieve data for each query
    retrieved_data = combined_retriever(queries)
    
    # Step 3: Combine retrieved results
    combined_context = "\n".join(
        f"Structured:\n{result['structured']}\nUnstructured:\n{' '.join(result['unstructured'])}"
        for result in retrieved_data
    )
    
    # Step 4: Generate final response
    answer_template = """Answer the question based only on the following context:
    {context}
    
    Question: {question}
    """
    final_prompt = ChatPromptTemplate.from_template(answer_template)
    chain = final_prompt | llm | StrOutputParser()
    return chain.invoke({"context": combined_context, "question": original_query})

In [87]:
final_result = combined_fusion_graph_rag("What causes gastric ulcers?")
print(final_result)



Gastric ulcers can be caused by several factors, including:

1. Helicobacter pylori (H. pylori) infection
2. Nonsteroidal anti-inflammatory drugs (NSAIDs)
3. Stress
4. Radiation therapy
5. Chemotherapy
6. Inflammatory bowel disease (IBD), such as Crohn's disease and ulcerative colitis
7. Zollinger-Ellison syndrome, a rare condition characterized by excessive production of gastric acid
8. Gastroesophageal reflux disease (GERD)
9. Smoking
10. Certain medications, such as corticosteroids and certain antibiotics

It's worth noting that the exact cause of gastric ulcers can vary depending on the individual and the specific type of ulcer.

Here are some possible causes of gastric ulcers based on the provided text:

* Helicobacter pylori infection (CT = computed tomography; NSAID = nonsteroidal anti-inflammatory drug)
* Stress
* Radiation therapy
* Chemotherapy

It's also worth noting that certain conditions, such as inflammatory bowel disease and Zollinger-Ellison syndrome, can increase the 

In [98]:
questions = [
    "What is a gastric ulcer?",
    "What causes gastric ulcers?",
    "How does H. pylori infection contribute to gastric ulcers?",
    "What are the common symptoms of a gastric ulcer?",
    "How is a gastric ulcer diagnosed?",
    "What tests are used to detect H. pylori infection?",
    "What are the common treatments for gastric ulcers?",
    "What are the potential side effects of ulcer medications?",
    "How do proton pump inhibitors (PPIs) work to treat gastric ulcers?",
    "What are the long-term effects of untreated gastric ulcers?",
    "What are the signs that a gastric ulcer is bleeding?",
    "What should I do if I suspect my gastric ulcer is bleeding?",
    "How is a bleeding gastric ulcer treated in an emergency?",
    "What are the symptoms of a perforated gastric ulcer?",
    "How is a perforated gastric ulcer treated?",
    "How does anemia relate to gastric ulcers?",
    "What are the alternatives to NSAIDs if I have a history of gastric ulcers?",
    "How does gastric acid secretion influence the formation of stomach ulcers?",
    "How is the urea breath test used to diagnose H. pylori infection?",
    "How do NSAIDs induce gastric mucosal injury leading to ulcers?",
    "What is the role of endoscopy in the management of peptic ulcer disease?",
    "How does chronic use of corticosteroids influence peptic ulcer formation?",
    "How do you differentiate between benign and malignant gastric ulcers during endoscopy?",
    "What are the indications for endoscopic biopsy in patients with suspected gastric ulcers?",
    "What are the differences between gastritis, gastric erosion, and gastric ulcers?"
]

In [94]:
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
import torch

In [95]:
num_gpus = torch.cuda.device_count()

In [96]:
def answer_question(question):
    """Function to get the answer for a single question using combined_fusion_graph_rag."""
    try:
        return combined_fusion_graph_rag(question)
    except Exception as e:
        return f"Error: {str(e)}"

In [97]:
def process_questions_in_batches(questions, batch_size):
    """Distributes questions across available GPUs in batches."""
    answers = []
    with ThreadPoolExecutor(max_workers=num_gpus * batch_size) as executor:
        results = list(executor.map(answer_question, questions))
    return results

In [None]:
batch_size = max(1, len(questions) // (multiprocessing.cpu_count() * num_gpus))
answers = process_questions_in_batches(questions, batch_size)

# Save to Excel
data = {"Question": questions, "Answer": answers}
df = pd.DataFrame(data)
output_path = "gastric_ulcer_answers.xlsx"
df.to_excel(output_path, index=False)
df.to_csv('fus_graph_output.csv', index=False)

print(f"Answers saved to {output_path}")

In [None]:
df

In [71]:
output_path = "gastric_ulcer_answers.xlsx"
df.to_excel(output_path, index=False)