In [1]:
import os
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Fast chunker for large docs
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Set your LM Studio endpoint
os.environ["OPENAI_API_BASE"] = "http://localhost:1234/v1"
os.environ["OPENAI_API_KEY"] = "lm-studio"

# Load and process PDF (PDFPlumberLoader is known for speed, but optionally use multiprocessing for faster load)
loader = PDFPlumberLoader("Basic_Home_Remedies.pdf")
docs = loader.load()
print("Pages loaded:", len(docs))

# Fast chunking: RecursiveCharacterTextSplitter is much faster than SemanticChunker
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(docs)

# Batch embedding for faster vectorization (if supported)
embedder = HuggingFaceEmbeddings()
vector = FAISS.from_documents(documents, embedder)  # FAISS defaults to Flat, switch to HNSW for big docs if needed
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 2})

# Initialize LLM
llm = ChatOpenAI(
    model="tinyllama-1.1b-chat-v1.0",
    temperature=0.7,
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_key=os.environ["OPENAI_API_KEY"],
    request_timeout=60,
    verbose=True
)

prompt = """
You are a domain expert assistant.
Use the provided context to answer the question clearly and accurately.
If the answer cannot be found in the context, say "The information is not available in the provided context."
Provide a well-structured answer in 3–4 sentences and keep it factual.

Context:
{context}

Question:
{question}

Answer:
"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt)
llm_chain = LLMChain(llm=llm, prompt=QA_CHAIN_PROMPT, verbose=True)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)
combine_documents_chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_variable_name="context",
    document_prompt=document_prompt,
    callbacks=None,
)

qa = RetrievalQA(
    combine_documents_chain=combine_documents_chain,
    retriever=retriever,
    return_source_documents=True,
    verbose=True,
)

# Example query
result = qa("write about sleep hygiene")
print("Answer:", result["result"])


Pages loaded: 3


  embedder = HuggingFaceEmbeddings()
  embedder = HuggingFaceEmbeddings()
  llm = ChatOpenAI(
  llm_chain = LLMChain(llm=llm, prompt=QA_CHAIN_PROMPT, verbose=True)
  combine_documents_chain = StuffDocumentsChain(
  qa = RetrievalQA(
  result = qa("write about sleep hygiene")




[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a domain expert assistant.
Use the provided context to answer the question clearly and accurately.
If the answer cannot be found in the context, say "The information is not available in the provided context."
Provide a well-structured answer in 3–4 sentences and keep it factual.

Context:
Context:
content:3.4 Sleep Hygiene
 Maintain a consistent sleep schedule (7-9 hours per night).
 Avoid screens at least 1 hour before bedtime.
 Keep the bedroom cool and dark for better sleep quality.
3.5 Detox & Immunity Boosting
 Start the day with warm lemon water for detoxification.
 Consume garlic and ginger to strengthen immunity.
 Eat fermented foods (yogurt, kimchi) for gut health.
3.6 Avoiding Harmful Habits
 Quit smoking and limit alcohol consumption.
 Reduce intake of caffeinated and sugary drinks.
 Avoid prolonged sitting; take breaks and move aro

In [2]:
!pip list

Package                   Version

You should consider upgrading via the 'C:\Users\ektas\OneDrive\Desktop\virtual env\ekfile\Scripts\python.exe -m pip install --upgrade pip' command.



------------------------- -----------
aiohappyeyeballs          2.6.1
aiohttp                   3.13.2
aiosignal                 1.4.0
annotated-doc             0.0.3
annotated-types           0.7.0
anyio                     4.11.0
argon2-cffi               25.1.0
argon2-cffi-bindings      25.1.0
arrow                     1.4.0
asttokens                 3.0.0
async-lru                 2.0.5
async-timeout             4.0.3
attrs                     25.4.0
babel                     2.17.0
beautifulsoup4            4.14.2
bleach                    6.3.0
certifi                   2025.10.5
cffi                      2.0.0
charset-normalizer        3.4.4
click                     8.3.0
colorama                  0.4.6
comm                      0.2.3
cryptography              46.0.3
dataclasses-json          0.6.7
debugpy                   1.8.17
decorator                 5.2.1
defusedxml                0.7.1
distro                    1.9.0
exceptiongroup            1.3.0
executing           