## Environment Setup

In [80]:
import os
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Ensure the environment variables are set
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')

# Set environment variables for the application
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

In [82]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM

## Load PDF Document

In [84]:
# Load Documents (use PyPDFLoader for PDF)
file_path = r"/Users/saifmohammed/Downloads/Diabetes_Care_BADAS_guideline2019-3.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

docs[0].page_content[:1000]

'DIABETES CARE \nBADAS Guideline 2019 \n          \n          P|) DAS GUELINE ON Man \nDELIT IGEMEN \n  A Joint Initiative of \nDiabetic Association of Bangladesh \nNCDC Program, Directorate General of Health Services'

## Split Document into Chunks

In [86]:
# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

for i, chunk in enumerate(splits[:3]):  # Show the first 3 chunks
    print(f"\n--- Chunk {i+1} ---")
    print(chunk.page_content[:1000])  # Print the first 1000 characters of the chunk
    print("\n" + "-"*70 + "\n")  # Separator between chunks


--- Chunk 1 ---
DIABETES CARE 
BADAS Guideline 2019 
          
          P|) DAS GUELINE ON Man 
DELIT IGEMEN 
  A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services

----------------------------------------------------------------------


--- Chunk 2 ---
DIABETES CARE 
BADAS Guideline 2019 
  A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services   
Diabetes Care: BADAS Guideline 2019 HEI! 1

----------------------------------------------------------------------


--- Chunk 3 ---
DIABETES CARE: BADAS GUIDELINE 2019 
Convener: Prof A K Azad Khan 
Chairman: Prof Hajera Mahtab 
Members of the steering committee 
Prof Dr AHM Enayet Hossain 
Prof Akhtar Hussain 
Prof Zafar Anmed Latif 
Prof Tofail Ahmed 
Prof Laique Ahmed Khan 
Prof Nazrul Islam Siddiqui 
Prof Md Hafizur Rahman 
Prof Abdus Saleque Mollah 
Prof Md Farid Uddin 
Prof M A Jalil Ansary 
Prof Dr MA Samad 
Prof SM

## Embedding

In [None]:
# Use HuggingFace Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/e5-small-v2")
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
retriever = vectorstore.as_retriever()

## Chatbot Prompt & LLM Model

In [70]:
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template("""
You are a diabetes assistant for patients in Bangladesh. Provide concise, textbook-based advice on diabetes management. 
Keep responses short, direct, and actionable. Consider local diet, lifestyle, and healthcare practices. Avoid unnecessary details. 
If medical consultation is needed, advise accordingly.
""")

# LLM
llm = OllamaLLM(model="mistral")

In [72]:
# Post-processing
def format_docs(docs):
    return "\n".join(f"{i+1}. {doc.page_content.strip()}" for i, doc in enumerate(docs))

## Retrival & Generation

In [76]:
from langchain.schema.runnable import RunnableMap

# Chain
rag_chain = (
    RunnableMap({"context": retriever | format_docs, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
)

In [78]:
# Question
rag_chain.invoke("What steps you will suggest to control my sugar level?")

'1. Monitor Blood Sugar Levels: Regularly check blood sugar levels at home using a glucometer. Aim for before-meal (fasting) levels less than 140 mg/dL and after-meal (postprandial) levels less than 180 mg/dL.\n\n  2. Balanced Diet: Follow a balanced diet with appropriate portion sizes, focusing on complex carbohydrates, lean proteins, fruits, vegetables, and whole grains. Limit simple sugars, saturated fats, and sodium intake.\n\n  3. Physical Activity: Engage in at least 30 minutes of moderate-intensity physical activity daily, such as walking or cycling. Consult a healthcare professional for guidance on suitable activities.\n\n  4. Medication Adherence: Take prescribed medication as directed by your doctor to manage blood sugar levels effectively. If you have any concerns about medications, consult with your healthcare provider.\n\n  5. Regular Check-ups: Schedule regular appointments with your healthcare team for monitoring and adjusting treatment plans as needed.\n\n  6. Foot Care