In [2]:
import os
#  API keys
os.environ["LLAMA_CLOUD_API_KEY"] = "your_llama_api_key"
os.environ["OPENAI_API_KEY"] = "your_openai_api_key"
os.environ["CO_API_KEY"] = "your_cohere_api_key"

In [3]:
!pip install -qU langchain_experimental langchain_openai langchain_community langchain chromadb fastembed openai llama-parse rank_bm25 cohere



[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.0/69.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.3/19.3 MB[0m [31m71.0 MB/s[0m eta [36m0:00:00[

In [12]:
# Parsing using Llamaparse
from llama_parse import LlamaParse
medical_docs = LlamaParse(result_type="markdown").load_data("/content/RAG_PDF.pdf")
text_contents = [doc.text_resource.text for doc in medical_docs]


Started parsing the file under job_id 6de5acc6-2d33-4561-811f-abb7d91ff32c


In [13]:
# Step 2 Convert to LangChain documents
from langchain.schema import Document
text_documents = [Document(page_content=text) for text in text_contents]

In [14]:
# Step 3 Chunk the documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
chunks = text_splitter.split_documents(text_documents)


In [15]:
# Step 4 Embed and index chunks using Chroma
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import Chroma

embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
vectorstore = Chroma.from_documents(chunks, embedding=embed_model)
vectorstore = Chroma.from_documents(chunks, embedding=embed_model)

In [16]:
# Step 5 Build hybrid retriever
from langchain.retrievers import BM25Retriever, EnsembleRetriever

vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
keyword_retriever = BM25Retriever.from_documents(text_documents, k=1)

hybrid_retriever = EnsembleRetriever(
    retrievers=[vector_retriever, keyword_retriever],
    weights=[0.8, 0.2]
)


In [17]:
# Step 6 Query system
query = "What does my LDL cholesterol level mean?"
hybrid_chunks = hybrid_retriever.invoke(query)

# Combine context
context = ""
for chunk in hybrid_chunks:
    context += f"{chunk}\n@@@@@"


In [18]:
#  Step 7 Rerank with Cohere
import cohere
co = cohere.Client()  # API key set via os.environ

docs = [str(chunk) for chunk in hybrid_chunks]
rerank_response = co.rerank(query=query, documents=docs, top_n=3)
reranked_chunks = [docs[r.index] for r in rerank_response.results]

In [19]:
# Combine reranked content
rerank_context = ""
for chunk in reranked_chunks:
    rerank_context += f"{chunk}\n@@@@@"


In [None]:
# Step 8 Generate answer using OpenAI
from langchain_openai import ChatOpenAI

system_message = """
You are a medical assistant. Only answer based on the medical report provided in the context.
Do not use external medical knowledge or assumptions. If the answer is not in the report, say "I don't know."
"""

human_message = f"Query: {query}\n\nContext: {rerank_context}"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": human_message}
]

llm = ChatOpenAI(temperature=0)
response = llm.invoke(messages)
print(response)