In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate
from langchain.schema.runnable import RunnablePassthrough

import pickle
import os

In [2]:

# Load the cleaned_documents list from the file
with open('final_pdf_pages.pkl', 'rb') as file:
    docs = pickle.load(file)

print("Documents loaded successfully!")


Documents loaded successfully!


In [3]:
len(docs)

220

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    separators=['\n'],
    chunk_size=2024,
    chunk_overlap=204,
    length_function=len,
)

In [5]:
data = text_splitter.split_documents(docs)

In [6]:
len(data)

615

In [7]:
#loading the embedding model from huggingface
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name,
  model_kwargs=model_kwargs
)

  embeddings = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange


In [11]:
#loading the data and correspond embedding into the FAISS
vectorstore = FAISS.from_documents(docs, embeddings)

In [9]:
# vectorstore.save_local("faiss_index_")

In [12]:
# Load from local storage
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings,allow_dangerous_deserialization=True)

In [13]:
#creating a retriever on top of database
retriever = persisted_vectorstore.as_retriever()

In [17]:
template = """
        Task: You are an assistant that generates multiple variations of a given question. 
        For each variation, maintain the original intent of the question, but change the phrasing, structure, 
        or tone to create a diverse set of queries.

Generate 5-7 variations that cover:

Synonym replacements while keeping the question concise.
Alternative structures, such as rephrasing into "why," "how," or "what" forms if relevant.
Casual and formal tones.
Slightly more specific or broader wording.
Examples:

Original Question: "What is the impact of inflation on the stock market?"
Variations:
"How does inflation affect stock prices?"
"What are the effects of inflation on the stock market?"
"In what ways does inflation influence stock market trends?"
"Could inflation lead to changes in stock market values?"
"How does rising inflation impact the performance of stocks?"
"What influence does inflation have on market prices?"
"What happens to stock prices when inflation increases?"

the final answer should be a python list: [
    "What tests can be used to detect stomach ulcers?",
    "Why are endoscopies commonly used in gastric ulcer diagnosis?",
    "How do doctors figure out if someone has a stomach ulcer?",
    "Can an upper endoscopy be used to diagnose all types of gastric ulcers, or are there other methods more effective for certain cases?",
    "What tests and procedures are typically involved in diagnosing gastric ulcers, including acid reflux disease?",
    "How can a patient determine if they have been diagnosed with a stomach ulcer based on symptoms, test results, or imaging studies?",
    "When considering the diagnosis of a gastric ulcer, what role does symptom severity play in determining the need for further testing or treatment?"
]

Now generate the list of variations for the given question: {question}

        Answer:
        """

prompt = ChatPromptTemplate.from_template(template)

# model = OllamaLLM(model="llama3.2")

chain = prompt | model

print(chain.invoke({"question": "How is a gastric ulcer diagnosed?"}))

Here are 7 variations of the question "How is a gastric ulcer diagnosed?":

1. What diagnostic tests can help identify a stomach ulcer?
2. Why are endoscopies considered a key method in diagnosing gastric ulcers?
3. How do doctors typically diagnose gastric ulcers using medical imaging and other techniques?
4. Can an upper endoscopy be used to confirm the diagnosis of all types of gastric ulcers, or are there other methods that may be more effective for certain cases?
5. What is the role of symptom assessment in diagnosing gastric ulcers, and how do doctors determine if a patient's symptoms warrant further testing?
6. How can a patient determine if they have been diagnosed with a stomach ulcer based on their medical history, physical examination, and diagnostic test results?
7. In what ways does the severity of gastric ulcer symptoms impact the need for diagnosis and treatment?

These variations maintain the original intent of the question while using different phrasing, structures, an

In [18]:
fusion_template = """
        Task: You are an assistant that generates multiple variations of a given question. 
        For each variation, maintain the original intent of the question, but change the phrasing, structure, 
        or tone to create a diverse set of queries.

Generate 5-7 variations that cover:

Synonym replacements while keeping the question concise.
Alternative structures, such as rephrasing into "why," "how," or "what" forms if relevant.
Casual and formal tones.
Slightly more specific or broader wording.
Examples:

Original Question: "What is the impact of inflation on the stock market?"
Variations:
"How does inflation affect stock prices?"
"What are the effects of inflation on the stock market?"
"In what ways does inflation influence stock market trends?"
"Could inflation lead to changes in stock market values?"
"How does rising inflation impact the performance of stocks?"
"What influence does inflation have on market prices?"
"What happens to stock prices when inflation increases?"

the final answer should be a python list: [
    "What tests can be used to detect stomach ulcers?",
    "Why are endoscopies commonly used in gastric ulcer diagnosis?",
    "How do doctors figure out if someone has a stomach ulcer?",
    "Can an upper endoscopy be used to diagnose all types of gastric ulcers, or are there other methods more effective for certain cases?",
    "What tests and procedures are typically involved in diagnosing gastric ulcers, including acid reflux disease?",
    "How can a patient determine if they have been diagnosed with a stomach ulcer based on symptoms, test results, or imaging studies?",
    "When considering the diagnosis of a gastric ulcer, what role does symptom severity play in determining the need for further testing or treatment?"
]

Now generate the list of variations for the given question
        """

In [19]:
prompt = ChatPromptTemplate(input_variables=['original_query'],
                            messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[],template=fusion_template)),
                            HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['original_query'], template='Generate multiple search queries related to: {question} \n OUTPUT (7 queries):'))])
     

In [20]:
original_query = "How is a gastric ulcer diagnosed?"

In [21]:
generate_queries = (
    prompt | model | StrOutputParser() | (lambda x: x.split("\n"))
)

In [22]:
print(generate_queries)

first=ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\n        Task: You are an assistant that generates multiple variations of a given question. \n        For each variation, maintain the original intent of the question, but change the phrasing, structure, \n        or tone to create a diverse set of queries.\n\nGenerate 5-7 variations that cover:\n\nSynonym replacements while keeping the question concise.\nAlternative structures, such as rephrasing into "why," "how," or "what" forms if relevant.\nCasual and formal tones.\nSlightly more specific or broader wording.\nExamples:\n\nOriginal Question: "What is the impact of inflation on the stock market?"\nVariations:\n"How does inflation affect stock prices?"\n"What are the effects of inflation on the stock market?"\n"In what ways does inflation influence stock market tren

In [23]:
from langchain.load import dumps, loads


def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [24]:
ragfusion_chain = generate_queries | retriever.map() | reciprocal_rank_fusion

In [25]:
import langchain
langchain.debug = True

In [26]:

ragfusion_chain.input_schema.schema()

{'properties': {'question': {'title': 'Question', 'type': 'string'}},
 'required': ['question'],
 'title': 'PromptInput',
 'type': 'object'}

In [None]:
ragfusion_chain.invoke({"question": original_query})

In [28]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

full_rag_fusion_chain = (
    {
        "context": ragfusion_chain,
        "question": RunnablePassthrough()
    }
    | prompt
    | model
    | StrOutputParser()
)

In [29]:
full_rag_fusion_chain.input_schema.schema()

{'properties': {'question': {'title': 'Question', 'type': 'string'},
  'root': {'title': 'Root'}},
 'required': ['question', 'root'],
 'title': 'RunnableParallel<context,question>Input',
 'type': 'object'}

In [None]:
result = full_rag_fusion_chain.invoke({"question": "How long does it typically take for a gastric ulcer to heal?"})

In [31]:
print(result)

The healing time for a gastric ulcer can vary depending on the size and location of the ulcer, as well as the individual's overall health.

Generally, small gastric ulcers may heal within 2-4 weeks with medical treatment, such as antibiotics or acid-reducing medications. Larger ulcers may take longer to heal, typically taking 6-8 weeks to close completely.

Endoscopic therapy, such as using butyl-cyanoacrylate or band ligation, can also help speed up the healing process by promoting clotting and reducing bleeding.

It's essential to note that some gastric ulcers may not heal on their own and may require surgical intervention. In these cases, the healing time can be longer, typically taking 6-12 months or more to recover fully.

In the context of the provided text, there is no specific information about the typical healing time for gastric ulcers. However, it does mention that a randomized trial found BRTO (Balloon Retrograde Transvenous Obliteration) increased the likelihood of remaini

In [None]:
import pandas as pd

# List of 50 questions
questions = [
    "What is a gastric ulcer?",
    "What causes gastric ulcers?",
    "How does H. pylori infection contribute to gastric ulcers?",
    "What are the common symptoms of a gastric ulcer?",
    "How is a gastric ulcer diagnosed?",
    "What tests are used to detect H. pylori infection?",
    "What lifestyle changes can help prevent gastric ulcers?",
    "How does diet affect gastric ulcers?",
    "What are the common treatments for gastric ulcers?",
    "How effective are antibiotics in treating H. pylori-related ulcers?",
    "How long does it typically take for a gastric ulcer to heal?",
    "What are the potential side effects of ulcer medications?",
    "How often should I follow up with my doctor if I have a gastric ulcer?",
    "How do proton pump inhibitors (PPIs) work to treat gastric ulcers?",
    "Is surgery ever required to treat a gastric ulcer?",
    "What are the long-term effects of untreated gastric ulcers?",
    "What are the signs that a gastric ulcer is bleeding?",
    "How serious is a bleeding gastric ulcer?",
    "What should I do if I suspect my gastric ulcer is bleeding?",
    "How is a bleeding gastric ulcer treated in an emergency?",
    "What are the symptoms of a perforated gastric ulcer?",
    "How is a perforated gastric ulcer treated?",
    "How does anemia relate to gastric ulcers?",
    "How can I reduce my risk of developing a gastric ulcer?",
    "What are the alternatives to NSAIDs if I have a history of gastric ulcers?",
    "How often should I get screened for H. pylori if I've had a gastric ulcer before?",
    "How does gastric acid secretion influence the formation of stomach ulcers?",
    "How is the urea breath test used to diagnose H. pylori infection?",
    "How do NSAIDs induce gastric mucosal injury leading to ulcers?",
    "What is the role of endoscopy in the management of peptic ulcer disease?",
    "How do you assess the severity of bleeding in a peptic ulcer patient?",
    "What are the indications for surgical intervention in peptic ulcer disease?",
    "How does H. pylori eradication therapy affect the recurrence of stomach ulcers?",
    "How are refractory peptic ulcers managed clinically?",
    "What are the long-term outcomes of patients with peptic ulcer bleeding?",
    "How does chronic use of corticosteroids influence peptic ulcer formation?",
    "How do you differentiate between benign and malignant gastric ulcers during endoscopy?",
    "How do you manage patients with non-healing gastric ulcers despite standard treatment?",
    "What are the indications for endoscopic biopsy in patients with suspected gastric ulcers?",
    "How do you approach the treatment of gastric ulcers in patients with concurrent liver disease?",
    "How do you tailor the treatment of peptic ulcers in elderly patients with multiple comorbidities?",
    "What is your approach to managing patients with peptic ulcers who are on anticoagulant therapy?",
    "What are the current guidelines for the use of proton pump inhibitors (PPIs) in peptic ulcer disease?",
    "How do you handle cases of refractory H. pylori infection that do not respond to standard eradication regimens?",
    "Where does acute pain from a gastric ulcer typically manifest?",
    "What are the differences between gastritis, gastric erosion, and gastric ulcers?",
    "Can black stool indicate a gastric ulcer?",
    "What is the Sakita classification in gastric ulcers?",
    "Could vomiting blood indicate a gastric ulcer?",
    "Are there any significant blood test results when a patient has a gastric ulcer?"
]

# Initialize an empty list to store results
results = []

# Loop through each question, invoke the RAG pipeline, and store the results
for question in questions:
    result = full_rag_fusion_chain.invoke({"question": question})
    results.append({"question": question, "answer": result})

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
df.to_csv("rag_pipeline_results.csv", index=False)
df.to_excel("rag_pipeline_results.xlsx", index=False)

print("Results saved to 'rag_pipeline_results.csv'")


[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "What is a gastric ulcer?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "question": "What is a gastric ulcer?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "What is a gastric ulcer?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m{
  "question": "What is a gastric ulcer?"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnableSequence > prompt:ChatPromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][