In [1]:
import sys
sys.path.append("/home/kap2403/Desktop/Medico-AI-Bot")

In [2]:
import os
import toml
from typing import Optional
from dotenv import load_dotenv
from groq import Groq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
import logging
from langchain_groq import ChatGroq
from extract_metadata import Metadata


# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

class Medibot:
    def __init__(self, config_path: str = "/home/kap2403/Desktop/Medico-AI-Bot/src/bot/configs/prompt.toml"):
        """Initialize Medibot with configuration and Groq client."""
        # Load environment variables
        load_dotenv()
        self.api_key = os.environ.get("GROQ_API_KEY")
        if not self.api_key:
            logger.error("GROQ_API_KEY not found in environment variables")
            raise ValueError("GROQ_API_KEY is required")

        # Load prompt configuration
        try:
            config = toml.load(config_path)
            self.system_prompt = config["rag_prompt"]["system_prompt"]
            self.user_prompt_template = config["rag_prompt"]["user_prompt_template"]
        except (FileNotFoundError, toml.TomlDecodeError) as e:
            logger.error(f"Failed to load config from {config_path}: {e}")
            raise

        # Initialize prompt template
        self.prompt_template = ChatPromptTemplate.from_messages([
            ("system", self.system_prompt),
            ("user", self.user_prompt_template)
        ])

        # initialize vector database
        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
        vector_store = FAISS.load_local(
                        "/home/kap2403/Desktop/Medico-AI-Bot/faiss_index", embeddings, allow_dangerous_deserialization=True
                    )
        self.retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10})
        # Initialize Groq client
        
        self.model = ChatGroq(
                            model="llama-3.1-8b-instant",
                            temperature=0,
                            max_tokens=None,
                            timeout=None,
                            max_retries=2,
                            )
        
        self.metadata_extactor = Metadata("/home/kap2403/Desktop/Medico-AI-Bot/dataset/metadata.csv")

    
    def query(self, question: str) -> str:
        retrieved_docs = self.retriever.invoke(question)

        # RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
        rag_chain = (
            RunnableParallel({
            "context": RunnableLambda(lambda _: retrieved_docs),  # Reuse retrieved docs
            "question": RunnablePassthrough()
        })
            | self.prompt_template
            | self.model
            | StrOutputParser()
        )

        answer = rag_chain.invoke({"question": question})

        refered_tables , refered_images = self.metadata_extactor.get_data_from_ref(retrieved_docs)
        return answer, retrieved_docs, refered_tables , refered_images

In [3]:
model = Medibot()

2025-04-25 15:04:54,320 - INFO - Loading faiss with AVX2 support.
2025-04-25 15:04:54,861 - INFO - Successfully loaded faiss with AVX2 support.
2025-04-25 15:04:54,899 - INFO - Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes.


In [4]:
answer, retrieved_docs, refered_tables , refered_images = model.query("How is tuberculosis diagnosed?")

2025-04-25 15:05:34,389 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-04-25 15:05:35,966 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [5]:
refered_tables

{'#/tables/280': '| Fagon et al. (15)   | Radiographic infiltrate + purulent sputum + fever or leukocytosis                        |   1.03 |\n|---------------------|------------------------------------------------------------------------------------------|--------|\n| Timset et al. (16)  | = Radiographic infiltrate + 2 of the following: fever, leukocytosis, or purulent sputum. |   0.96 |'}

In [6]:
refered_images

{'#/pictures/744': 'iVBORw0KGgoAAAANSUhEUgAAAcsAAAFaCAIAAAD+WuH2AAEAAElEQVR4nOz92Y4ky5YliO1JRFTVBnePiHPOzaqsqQF290t9FUH+Ex/4wC8iCL4Q7K7KzJuZZ4jB3W1QVRn2wAeLc+6tahBsFEkQIFIeIizMXMMdbmpL9l57rSUYEfAv61/Wv6x/Wf+y/r+w6P/XP8C/rH9Z/7L+Zf3/7ZLHX1+/vv7866+UEpgNtVLy6H2apjFUmEnS0DZPx1q30+m4rus0Hc12ImAuvbeplL21nAtEAKMAjz4Ox+W23l6enrfrTaas7hwEjKqaczHTknMfjYWE02hjmqbb/T7N875tZcqj15QmC4MIRrZwIVTzAAIDKWn0FoijtZKLgybJt9v9cFjG6HnKoysAQkBEeDgAqFnJqe77NM2ttsNxud9vp+PpftuWw3Hbt2maujZTZcahmlM2tQggYned52lb92Va7vf7NM1tVBYyjTFUErl7Kbn3kVPprQJQYJgqIZkrsUOQKaVMQzuLjD6SpL1WYYYIYvGwiCAEU0dERACIACBiVUVARzNzDkHGiHAfCCiZVD0iwg0AAQgRESj80Zs4EXg4BAD8/kcAYgTg7/8KQor4/ttCAITvr0Y4IgYABGFAoH+/hgCBwvHxEyIiBCACOkRAQCABgAMCOAICEgEyM/S+ta1xCAIgBoBjIJITUQRCIFIgIAEgIhJDREQ8fioAJGIMCEBiMjMRAgQzBwAijPCAYM5oAGCA0NUhggg8zD0QMAIgMCLgcWcgBiBimPnjmyBgAAQBEIa6cPYADwXCVCTPiTgxTREQYKMpJwpAJglXIgIE4jTltO81ZzF3lhSgGAkCCMWxeXhOkw4thVvTaTr20XOaAMwUAIJFkMJUmbn3MS+lD82yqHZmCTThbDYCLKdc9zYtUx+jpOwKSGjuSWRoSym5gQPkRG4hIgDoEIhBiACQUjKDcEUEIEYURo8I

In [7]:
retrieved_docs

[Document(id='5bd89418-92ce-4c24-a1e9-a7cad41f37b6', metadata={'source': 'Diseases-of-Ear-Nose-and-Throat___Head-and-Neck-Surgery-Elsevier-India-2014', 'specilization': 'local_pdfs', 'chunk_index': 290, 'self_ref': '#/texts/7068', 'parent_ref': '#/body', 'child_ref': '', 'chunk_type': 'text'}, page_content='In the presence of secondary pyogenic infection, tubercular otitis media may be indistinguishable from chronic suppurative otitis media. Culture of ear discharge for tubercle bacilli, histopathological examination of granulations and X-ray chest, and other evidence of tuberculosis in the body help to confirm the diagnosis. Presently DNA probe and PCR (polymerase chain reaction) from the ear discharge can give early diagnosis in 3-7 days.'),
 Document(id='19c7cf71-f602-41ba-974c-33534a978010', metadata={'source': 'Diseases-of-Ear-Nose-and-Throat___Head-and-Neck-Surgery-Elsevier-India-2014', 'specilization': 'local_pdfs', 'chunk_index': 629, 'self_ref': '#/texts/13119', 'parent_ref': 

In [9]:
source_ref = list()
for doc in retrieved_docs:
    metadata = doc.metadata
    source_ref.append(metadata["source"])

unique_source_ref = set(source_ref)
print("Unique source references:", unique_source_ref)


Unique source references: {'Biochemistry-U-Satyanaryan-4th-Edition', 'Diseases-of-Ear-Nose-and-Throat___Head-and-Neck-Surgery-Elsevier-India-2014', 'marinos-the-icu'}
