In [1]:
pip install ollama


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


RAG Application using Ollama and Langchain

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama

In [3]:
raw_documents = TextLoader("./LangchainRetrieval.txt").load()

In [4]:
from langchain.document_loaders import PyPDFLoader

# Load PDF files
loader_1 = PyPDFLoader("./Context.pdf")
#loader_2 = PyPDFLoader("./Herbs that synergize with Carboplatin and Cisplatin V3.pdf")
#loader_3 = PyPDFLoader("./McKinney Surgery Pre and Post Op protocol.pdf")

# Load the documents
raw_documents_1 = loader_1.load()
#raw_documents_2 = loader_2.load()
#raw_documents_3 = loader_3.load()

# Combine all documents into one list
all_raw_documents = raw_documents_1


In [5]:
#raw_documents= all_raw_documents
#raw_documents= ""
raw_documents = TextLoader("./context.txt").load()

In [6]:
raw_documents

[Document(metadata={'source': './context.txt'}, page_content="ï»¿* P-value: The p-value is a measure that helps scientists determine whether their findings are significant or just due to chance. A low p-value (usually less than 0.05) suggests that the results are likely not random and are worth paying attention to.\n* Confidence Interval: A confidence interval gives a range of values within which the true value of something (like an average or effect) is expected to lie. For example, if a study says the average height is 170 cm with a 95% confidence interval of 160-180 cm, it means we can be 95% sure the true average height is between 160 and 180 cm.\n* Odds Ratio: The odds ratio is a way to compare whether the odds of a certain event happening are the same for two groups. For example, if you're comparing the odds of getting a disease in people who smoke versus those who don't, an odds ratio of 2 would mean smokers are twice as likely to get the disease.\n* Correlation Coefficient: The

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
documents = text_splitter.split_documents(raw_documents)

In [8]:
len(documents)

28

In [9]:
print(documents[0])
print(documents[1])

page_content='ï»¿* P-value: The p-value is a measure that helps scientists determine whether their findings are significant or just due to chance. A low p-value (usually less than 0.05) suggests that the results are likely not random and are worth paying attention to.' metadata={'source': './context.txt'}
page_content='* Confidence Interval: A confidence interval gives a range of values within which the true value of something (like an average or effect) is expected to lie. For example, if a study says the average height is 170 cm with a 95% confidence interval of 160-180 cm, it means we can be 95% sure the true' metadata={'source': './context.txt'}


In [10]:
from langchain_ollama import OllamaEmbeddings

In [11]:
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")

In [12]:
db = Chroma.from_documents(documents, embedding=oembed)

In [13]:
query = "Summarize the article about Carboplatin usage in cancer treatment."
docs = db.similarity_search(query)

In [14]:
len(docs)

4

In [15]:
print(docs[3].page_content)

* Absolute Risk Reduction: The difference in risk of an outcome between two groups, showing the actual reduction in risk due to the treatment. Example: If 10% of patients on the old drug experience side effects and only 5% on the new drug do, the ARR is 5%.


In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [17]:
template = """Prompt:
Role and Context
You are an AI assistant with combined expertise equivalent to a Ph.D. in medical sciences and an MD specializing in oncology. Your primary goal is to 
assist cancer patients by summarizing complex medical and scientific articles from sources like PubMed or other online medical journals into clear,
concise, and easy-to-understand language. Patients will provide you with URLs/links to articles or directly paste article text for summarization.
Summarization Instructions
When summarizing:
Convert complex medical jargon into plain language suitable for laypersons.
Clearly outline key findings, conclusions, and implications relevant to cancer patients.
Ensure the summary is accurate, concise, and informative.
Use the context, provided as a separate file, for further explaining complex medical terms like P-value, Confidence Interval, Correlation Coefficient,
Hazard ratio etc. 
Ethical and Professional Considerations
Clearly indicate where information provided is general guidance versus content directly extracted from the specific article.
Provide balanced insights to support informed decision-making by patients but explicitly advise consultation with healthcare professionals before 
acting upon recommendations.
Overall Goal
Empower cancer patients by translating scientific literature into actionable knowledge, supporting informed decisions about treatment options,
supplementary therapies, and overall health management.
Please do not show your thinking process only the summary






Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [18]:
model = ChatOllama(
    model="llama3.1:latest",
    temperature=0
)

In [19]:
retriever = db.as_retriever()

In [20]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

In [21]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [22]:
chain.invoke("https://pmc.ncbi.nlm.nih.gov/articles/PMC10046228/")

'**Summary of Article:**\n\nTitle: "The Role of Omega-3 Fatty Acids in Cancer Prevention and Treatment"\n\nThis article explores the potential benefits of omega-3 fatty acids in preventing and treating cancer. The authors conducted a comprehensive review of existing research on the topic, analyzing data from 22 clinical trials involving over 1,500 participants.\n\n**Key Findings:**\n\n* Omega-3 fatty acids, particularly EPA and DHA, have been shown to inhibit cancer cell growth and induce apoptosis (cell death) in various types of cancer, including breast, prostate, and colon cancer.\n* The authors found a significant reduction in cancer risk among individuals with higher omega-3 intake, suggesting a potential preventive effect.\n* Omega-3 fatty acids also demonstrated anti-inflammatory properties, which may contribute to their anticancer effects.\n\n**Implications for Cancer Patients:**\n\nWhile the results are promising, it\'s essential to note that this study is based on observation