In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from sentence_transformers import SentenceTransformer
#from langchain.lims import CTransformers
from transformers import AutoModelForCausalLM, AutoTokenizer

#Database
import chromadb


import pinecone
from dotenv import load_dotenv
import os
import pandas as pd
import torch
def configure():
    load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


Embedding model

In [2]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")

# Convert chunks to vector embeddings using GPU
#embeddings = embedding_model.encode(texts, convert_to_tensor=True, device="cuda")

Connecting Chroma DB

In [3]:
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_collection("medical_chatbot")

Retrive the Relevant Chunks from the ChromaDB

In [4]:
def retrieve_relevant_chunks(query, top_k=3,):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True, device="cuda").tolist()

    # Perform similarity search
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    retrieved_texts = results["documents"][0]  # Top-k documents
    return retrieved_texts

# Example Query
query = "What are the symptoms of diabetes?"
retrieved_texts = retrieve_relevant_chunks(query)
print("Relevant Chunks:", retrieved_texts)

Relevant Chunks: ['Question/nWhat are the symptoms of Diabetes ?Answer/nMany people with diabetes experience one or more symptoms, including extreme thirst or hunger, a frequent need to urinate and/or fatigue. Some lose weight without trying. Additional signs include sores that heal slowly, dry, itchy skin, loss of feeling or tingling in the feet and blurry eyesight. Some people with diabetes, however, have no symptoms at all.', 'urination  - feeling very hungry or tired  - losing weight without trying  - having sores that heal slowly  - having dry, itchy skin  - loss of feeling or tingling in the feet  - having blurry eyesight. being very thirsty frequent urination feeling very hungry or tired losing weight without trying having sores that heal slowly having dry, itchy skin loss of feeling or tingling in the feet having blurry eyesight. Signs of type 1 diabetes usually develop over a short period of time. The signs for', 'Question/nWhat are the symptoms of Your Guide to Diabetes: Type

Loading the LLAMA 2(7B)

In [5]:
gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)  # Convert bytes to GB
gpu_layers = min(50, int(gpu_mem * 5))  # Adjust dynamically

In [6]:
local_model_path = "E:/Project/LLM_HEALTH_LOCAL_DATA/Model/Model/"

from ctransformers import AutoModelForCausalLM

# Load GGUF model
model = AutoModelForCausalLM.from_pretrained(
    "E:/Project/LLM_HEALTH_LOCAL_DATA/Model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    gpu_layers=gpu_layers  # Adjust based on GPU memory
)

# Generate text
response = model("What are the symptoms of diabetes?", max_new_tokens=200)
print(response)


 Unterscheidung between type 1 and type 2 diabetes? 




RAG Pipeline

In [6]:
import requests

url = "http://localhost:11434/api/generate"



In [None]:
def rag_pipeline(query):
    query_embedding = embedding_model.encode(query).tolist()
    url = "http://localhost:11434/api/generate"

    # Step 2: Retrieve relevant documents from ChromaDB
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=2 
    )
    retrieved_docs = results["documents"][0] if results["documents"] else []

    # Step 3: Prepare context for LLAMA 2
    if retrieved_docs:
        context = "\n".join(retrieved_docs)
    else:
        context = "No relevant documents were found in the database."

    # Step 4: Generate response using LLAMA 2
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    data = {
    "model" : "llama2",
    "prompt" : prompt,
    "stream" : False
    }   
    
    headers = {
        "Content-Type" : "application/json"
    }
    # ✅ FIX: Use `model(prompt)` instead of `model.generate()`
    #response = model(prompt, max_new_tokens=200)  # Direct function call
    response = requests.post(url= url, json=data, headers=headers)
    
    if response.status_code == 200:
        value = response.json()
        print(value["response"])
    
    
    return value["response"]

# Test RAG pipeline
query = "I feel like vomiting?"
response = rag_pipeline(query)
print("RAG Response:", response)

In [13]:
import streamlit as st

st.title("💡 Medical Chatbot with LLAMA 2 & ChromaDB")

user_query = st.text_input("Ask a medical question:")

if st.button("Get Answer"):
    if user_query:
        response = rag_pipeline(query)
        st.write(response)
    else:
        st.warning("Please enter a question!")

2025-03-05 01:33:14.014 
  command:

    streamlit run C:\Users\rayan\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-05 01:33:14.018 Session state does not function when running a script without `streamlit run`
