# Load Documents

In [155]:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
import os
os.chdir("../")

In [26]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [27]:
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [28]:
extracted_data = load_pdf_file(data="data/")
len(extracted_data)

637

# Split Chunks

In [29]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [30]:
text_chunks=text_split(extracted_data)
len(text_chunks)

5859

# Embeddings

In [None]:
# LangChain LlamaCppEmbeddings has issues loading gguf embedding models
# from langchain_community.embeddings import LlamaCppEmbeddings
# embeddings = LlamaCppEmbeddings(
#     model_path="../models/nomic-embed-text-v1.5.Q4_K_M.gguf",
#     n_ctx=2048,        # llama.cpp default for this model; increase only with rope scaling
#     n_threads=4       # set to CPU cores
#     n_batch=512        # batch size in tokens, safe default
# )

In [72]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu", "trust_remote_code": "True"} # or "cuda" if you have a GPU
)

## Store embeddings into FAISS local database

In [73]:
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(text_chunks, embeddings)
db.save_local("vector_store")

In [74]:
# Test embeddings
query_result = embeddings.embed_query("Hello World")
print(len(query_result))

384


In [75]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [76]:
retriever.invoke("What is anemia?")

[Document(id='660bc947-7e9a-4a53-8250-3b4c8d1e466f', metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf', 'total_pages': 637, 'page': 193, 'page_label': '194'}, page_content='Anemias\nDefinition\nAnemia is a condition characterized by abnormally\nlow levels of healthy red blood cells or hemoglobin (the\ncomponent of red blood cells that delivers oxygen to tis-\nsues throughout the body).\nDescription\nThe tissues of the human body need a regular supply\nof oxygen to stay healthy. Red blood cells, which contain\nhemoglobin that allows them to deliver oxygen throughout\nthe body, live for only about 120 days. When they die, the'),
 Document(id='147f4136-bc24-48f7-b6af-db816c0eef33', metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15

# LLM

In [None]:
# Using LangChain Llama-CPP with local models
from langchain_community.llms import LlamaCpp
llm = LlamaCpp(
        model_path="../models/Llama-3.2-3B-Instruct-Q2_K.gguf",
        temperature=0.3,
        verbose=False,
        n_batch=512,
        n_ctx=2048,
)

In [None]:
# Using LangChain OpenAI
# from langchain_openai import OpenAI
# llm = OpenAI(model="gpt-4o-mini", temperature=0.3)

In [None]:
# Using LangChain Gemini
# from langchain_google_genai import ChatGoogleGenerativeAI
# llm = ChatGoogleGenerativeAI(
#     model="gemini-2.5-flash",
#     temperature=0.3
# )

E0000 00:00:1759262720.393554 11827410 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "Answer the question based only on the following context:"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [136]:
prompt_template = """
    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {input}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """
prompt = ChatPromptTemplate.from_template(prompt_template)

In [157]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [143]:
full_response = ""
for chunk in rag_chain.stream({"input": "What is anemia?"}):
    # The 'answer' key contains the generated text from the LLM
    if "answer" in chunk:
        print(chunk["answer"], end="", flush=True)
        full_response += chunk["answer"]

 Here is the answer to your question about anemia.

Anemia is a medical condition characterized by abnormally low levels of healthy red blood cells or hemoglobin.

In [None]:
response = rag_chain.invoke({"input": "What is acne?"})
print(response["answer"])

I don't know what XYZ is, as the provided context does not contain any information about it.
