In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate
from langchain.schema.runnable import RunnablePassthrough

from langchain_openai import ChatOpenAI

import pickle
import os

In [2]:
# OpenAI API key setup
OPENAI_API_TOKEN = "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"

os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN

In [30]:

# Load the cleaned_documents list from the file
with open('final_pdf_pages.pkl', 'rb') as file:
    docs = pickle.load(file)

print("Documents loaded successfully!")


Documents loaded successfully!


In [31]:
len(docs)

220

In [32]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    separators=['\n'],
    chunk_size=2024,
    chunk_overlap=204,
    length_function=len,
)

In [33]:
data = text_splitter.split_documents(docs)

In [34]:
len(data)

615

In [3]:
#loading the embedding model from huggingface
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name,
  model_kwargs=model_kwargs
)

  embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [50]:
#loading the data and correspond embedding into the FAISS
# vectorstore = FAISS.from_documents(data, embeddings)

In [51]:
# vectorstore.save_local("updated_faiss_index_")

In [4]:
# Load from local storage
vectorstore = FAISS.load_local("updated_faiss_index_", embeddings,allow_dangerous_deserialization=True)

In [52]:
retriever = vectorstore.as_retriever()

In [22]:
template = """ Answer the questions based only on the following context in detailed. You have the freedom to interpret the context however you should not respond for anything which is not in context. Dont mention about the context in your response.
        Given below is the context and question of the user.
        Question = {question}
        Context = {context}
        Answer:
         """
prompt = ChatPromptTemplate.from_template(template)

In [23]:
prompt.invoke({"question": 'hello', "context": 'conetex'})

ChatPromptValue(messages=[HumanMessage(content=' Answer the questions based only on the following context in detailed. You have the freedom to interpret the context however you should not respond for anything which is not in context. Dont mention about the context in your response.\n        Given below is the context and question of the user.\n        Question = hello\n        Context = conetex\n        Answer:\n         ', additional_kwargs={}, response_metadata={})])

In [6]:
llm = ChatOpenAI(model="gpt-4o", temperature=0)

In [27]:
questions = [
    "What is a gastric ulcer?",
    "What causes gastric ulcers?",
    "How does H. pylori infection contribute to gastric ulcers?",
    "What are the common symptoms of a gastric ulcer?",
    "How is a gastric ulcer diagnosed?",
    "What tests are used to detect H. pylori infection?",
    "What are the common treatments for gastric ulcers?",
    "What are the potential side effects of ulcer medications?",
    "How do proton pump inhibitors (PPIs) work to treat gastric ulcers?",
    "What are the long-term effects of untreated gastric ulcers?",
    "What are the signs that a gastric ulcer is bleeding?",
    "What should I do if I suspect my gastric ulcer is bleeding?",
    "How is a bleeding gastric ulcer treated in an emergency?",
    "What are the symptoms of a perforated gastric ulcer?",
    "How is a perforated gastric ulcer treated?",
    "How does anemia relate to gastric ulcers?",
    "What are the alternatives to NSAIDs if I have a history of gastric ulcers?",
    "How does gastric acid secretion influence the formation of stomach ulcers?",
    "How is the urea breath test used to diagnose H. pylori infection?",
    "How do NSAIDs induce gastric mucosal injury leading to ulcers?",
    "What is the role of endoscopy in the management of peptic ulcer disease?",
    "How does chronic use of corticosteroids influence peptic ulcer formation?",
    "How do you differentiate between benign and malignant gastric ulcers during endoscopy?",
    "What are the indications for endoscopic biopsy in patients with suspected gastric ulcers?",
    "What are the differences between gastritis, gastric erosion, and gastric ulcers?"
]


In [28]:
# Prepare storage for answers
answers = []

In [29]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

In [30]:
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vectorstore.similarity_search(state["question"])
    # print(retrieved_docs)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [31]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [32]:
response = graph.invoke({"question": "What are the symptoms of a perforated gastric ulcer?"})
print(response["answer"])

The symptoms of a perforated gastric ulcer include acute abdominal pain, tachycardia, and abdominal rigidity, which together form the classic triad associated with this complication. However, it is important to note that elderly patients or individuals who are immunosuppressed may not exhibit this classic presentation. Additionally, severe abdominal pain, shock, abdominal board-like rigidity, and signs of peritoneal irritation such as rebound tenderness are indicative of acute perforation. The presence of free intraperitoneal air is also a symptom of a perforated gastric ulcer.


In [33]:
import pandas as pd

In [34]:
for question in questions:
    result = graph.invoke({"question": question})
    answers.append(result["answer"])

In [35]:
# Save to Excel and CSV
data = {"Question": questions, "Answer": answers}
df = pd.DataFrame(data)

output_excel_path = "output_gpt_rag_excel.xlsx"
output_csv_path = "output_gpt_rag_csv.csv"

df.to_excel(output_excel_path, index=False)
df.to_csv(output_csv_path, index=False)

print(f"Answers saved to {output_excel_path} and {output_csv_path}")

Answers saved to output_gpt_rag_excel.xlsx and output_gpt_rag_csv.csv


In [36]:
df

Unnamed: 0,Question,Answer
0,What is a gastric ulcer?,A gastric ulcer is a type of peptic ulcer that...
1,What causes gastric ulcers?,Gastric ulcers can be caused by several factor...
2,How does H. pylori infection contribute to gas...,H. pylori infection contributes to gastric ulc...
3,What are the common symptoms of a gastric ulcer?,Common symptoms of a gastric ulcer include pos...
4,How is a gastric ulcer diagnosed?,A gastric ulcer is diagnosed through a combina...
5,What tests are used to detect H. pylori infect...,Tests used to detect H. pylori infection inclu...
6,What are the common treatments for gastric ulc...,Common treatments for gastric ulcers include t...
7,What are the potential side effects of ulcer m...,The potential side effects of ulcer medication...
8,How do proton pump inhibitors (PPIs) work to t...,Proton pump inhibitors (PPIs) are widely used ...
9,What are the long-term effects of untreated ga...,Untreated gastric ulcers can lead to several l...


In [160]:
# Replace the 'answers' column with just the 'answer' value from the dictionary
df['Answer'] = df['Answer'].apply(lambda x: x['answer'] if isinstance(x, dict) and 'answer' in x else None)

# Display the updated DataFrame
df

Unnamed: 0,Question,Answer
0,What is a gastric ulcer?,A gastric ulcer is a peptic ulcer that is defi...
1,What causes gastric ulcers?,Most gastric ulcers are directly caused by inf...
2,How does H. pylori infection contribute to gas...,H. pylori infection contributes to gastric ulc...
3,What are the common symptoms of a gastric ulcer?,The common symptoms of a gastric ulcer are pos...
4,How is a gastric ulcer diagnosed?,A gastric ulcer is diagnosed through two compl...
5,What tests are used to detect H. pylori infect...,Tests used to detect H. pylori infection inclu...
6,What are the common treatments for gastric ulc...,"The common treatments for gastric ulcers, as m..."
7,What are the potential side effects of ulcer m...,Adverse effects of ulcer medications with shor...
8,How do proton pump inhibitors (PPIs) work to t...,Proton pump inhibitors (PPIs) are used widely ...
9,What are the long-term effects of untreated ga...,The long-term effects of untreated gastric ulc...


In [161]:
output_excel_path = "output_gpt_rag_excel.xlsx"
output_csv_path = "output_gpt_rag_csv.csv"

df.to_excel(output_excel_path, index=False)
df.to_csv(output_csv_path, index=False)

print(f"Answers saved to {output_excel_path} and {output_csv_path}")

Answers saved to output_gpt_rag_excel.xlsx and output_gpt_rag_csv.csv
