### Environment Setup

In [1]:
import os
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Ensure the environment variables are set
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')

# Set environment variables for the application
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM

## Load Multiple PDF Documents

In [3]:
import fitz  # PyMuPDF

pdf_files = ["E:\CSE299-Junior-Design-Project\LLM-1\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf", 
             "E:\CSE299-Junior-Design-Project\LLM-1\PDFs\Insulin-Guideline-min.pdf", 
             "E:\CSE299-Junior-Design-Project\LLM-1\PDFs\Textbook-of-Diabetes-2024.pdf"]  
pdf_texts = []

for pdf in pdf_files:
    doc = fitz.open(pdf)
    text = ""

    for page in doc:
        text += page.get_text()

    pdf_texts.append(text)  # Store extracted text

# Print only the first 500 characters of each PDF
for idx, text in enumerate(pdf_texts, start=1):
    print(f"Text from {pdf_files[idx-1]} (first 500 chars):\n", text[:500], "\n" + "="*50)

Text from E:\CSE299-Junior-Design-Project\LLM-1\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf (first 500 chars):
   
DIABETES CARE 
BADAS Guideline 2019 
    
  
    
  
   
  
   
P|) DAS GUELINE ON Man 
DELIT 
IGEMEN 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services
  
  
DIABETES CARE 
BADAS Guideline 2019 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services 
  
Diabetes Care: BADAS Guideline 2019 HEI! 
1 
 
  
  
  
DIABETES CARE: BADAS GUIDELINE 2019 
Convener: Prof A K Azad  
Text from E:\CSE299-Junior-Design-Project\LLM-1\PDFs\Insulin-Guideline-min.pdf (first 500 chars):
 Bangladesh Endocrine Society (BES)
Insulin Guideline
First Edition 2018
Reprint 2019
All rights reserved by: Bangladesh Endocrine Society (BES)
Published by
Bangladesh Endocrine Society (BES)
Website: http://bes-org.net
E-mail: endobd2012@gmail.com
Bangladesh Endocrine Society (BES) In

### Split All the Documents into Chunks

In [4]:
# Combine all texts into one large document
full_text = "\n".join(pdf_texts)

# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(full_text)

# Display first 3 chunks
for i, chunk in enumerate(chunks[:3]):  
    print(f"\n--- Chunk {i+1} ---")
    print(chunk[:1000])  # Print first 1000 characters of the chunk
    print("\n" + "-" * 70 + "\n")


--- Chunk 1 ---
DIABETES CARE 
BADAS Guideline 2019 
    
  
    
  
   
  
   
P|) DAS GUELINE ON Man 
DELIT 
IGEMEN 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services
  
  
DIABETES CARE 
BADAS Guideline 2019 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services 
  
Diabetes Care: BADAS Guideline 2019 HEI! 
1 
 
  
  
  
DIABETES CARE: BADAS GUIDELINE 2019 
Convener: Prof A K Azad Khan 
Chairman: Prof Hajera Mahtab 
Members of the steering committee 
Prof Dr AHM Enayet Hossain 
Prof Akhtar Hussain 
Prof Zafar Anmed Latif 
Prof Tofail Ahmed 
Prof Laique Ahmed Khan 
Prof Nazrul Islam Siddiqui 
Prof Md Hafizur Rahman 
Prof Abdus Saleque Mollah 
Prof Md Farid Uddin 
Prof M A Jalil Ansary 
Prof Dr MA Samad 
Prof SM Ashrafuzzaman 
Prof MA Hasnat 
Dr Kazi Ali Hassan 
Dr Abdul Mannan Sarker 
Members of the Task Force 
Prof Md Faruque Pathan 
Dr Tareen Ahmed 
Dr Md F

## Embedding

In [5]:
# Step 3: Generate embeddings using HuggingFace
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/e5-small-v2")

# Step 4: Store chunks in ChromaDB
vectorstore = Chroma.from_texts(chunks, embedding=embedding_model)

# Step 5: Create a retriever
retriever = vectorstore.as_retriever()

## Chatbot Prompt & LLM Model

In [6]:
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template("""
You are a diabetes assistant for patients in Bangladesh. Provide concise, textbook-based advice on diabetes management. 
Keep responses short, direct, and actionable. Consider local diet, lifestyle, and healthcare practices. Avoid unnecessary details. 
If medical consultation is needed, advise accordingly.
""")

# LLM
llm = OllamaLLM(model="mistral")

In [7]:
# Post-processing
def format_docs(docs):
    return "\n".join(f"{i+1}. {doc.page_content.strip()}" for i, doc in enumerate(docs))

## Retrival & Generation

In [8]:
from langchain.schema.runnable import RunnableMap

# Chain
rag_chain = (
    RunnableMap({"context": retriever | format_docs, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
)

In [9]:
# Question
rag_chain.invoke("How can I control my blood sugar level?")

"1. Maintain a balanced diet: Include foods rich in fiber (e.g., whole grains, fruits, vegetables), lean proteins (fish, chicken), and healthy fats (nuts, avocado) while limiting sugary drinks, processed foods, and high-fat content items.\n  2. Regular exercise: Aim for 30 minutes of moderate-intensity physical activity at least five days a week. This can include walking, cycling, or swimming.\n  3. Monitor blood sugar levels: Check your blood glucose regularly as prescribed by your healthcare provider.\n  4. Take medications as directed: If you're on insulin or oral diabetes medication, make sure to take it exactly as prescribed and never skip doses.\n  5. Regular check-ups: Visit your doctor regularly for routine check-ups to manage your diabetes effectively and prevent complications.\n  6. Control HbA1c levels: Strive to maintain an HbA1c level of less than 7%. Consult a healthcare professional if you struggle with this goal.\n  7. Manage stress: High stress can affect blood sugar l