In [2]:
from google.colab import drive
drive.mount('/content/gdrive')




Mounted at /content/gdrive


In [3]:
!pip install faiss-cpu sentence-transformers pandas tqdm

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [4]:
# Install required libraries
!pip install pandas tqdm




In [5]:
import json

# Path to the dataset in Google Drive
file_path = "/content/gdrive/MyDrive/dataset/Ai Doctor/ori_pqal.json"

# Load the JSON file
with open(file_path, "r") as file:
    data = json.load(file)

# Check the keys in the data dictionary to understand its structure


# Access elements by their actual keys, for example:
# Assuming '21645374' is a key in the data dictionary
if '21645374' in data:
    print(json.dumps(data['21645374'], indent=4))
else:
    print("Key '21645374' not found in the data.")




{
    "QUESTION": "Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?",
    "CONTEXTS": [
        "Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants.",
        "The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A. madagascariensis. A single areole within a window stage leaf (PCD is occurring) was divided into three areas based on the progression of PCD; cells that will not undergo PCD (NPCD), cells in ear

In [6]:
# Generate Embeddings Using Sentence-BERT

from sentence_transformers import SentenceTransformer
import numpy as np

# Use BioBERT or ClinicalBERT for better medical context
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Function to create embeddings
def generate_embeddings(texts, batch_size=64):
    """Generates embeddings for a list of texts."""
    embeddings = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        batch_embeddings = model.encode(batch)
        embeddings.append(batch_embeddings)
    return np.vstack(embeddings)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
# Prepare the Data for Embedding:

# Prepare question + context pairs
texts = [f"{sample['QUESTION']} {sample.get('CONTEXTS', '')}"
         for key, sample in data.items()]  # Iterate through key-value pairs

# Generate embeddings
embeddings = generate_embeddings(texts)

# Check the shape of embeddings
print("Embedding shape:", embeddings.shape)

# Prepare question + context pairs


Embedding shape: (1000, 384)


In [8]:
#  Store Embeddings in FAISS

import faiss

# Define the dimension size (matching the embedding size)
dimension = embeddings.shape[1]

# Create FAISS index (L2 distance for similarity)
index = faiss.IndexFlatL2(dimension)

# Add embeddings to FAISS index
index.add(embeddings)

# Save FAISS index to Google Drive
faiss.write_index(index, "/content/gdrive/MyDrive/dataset/Ai Doctor/pubmedqa_faiss.index")

print(" Embeddings stored in FAISS!")


 Embeddings stored in FAISS!


"\nDefine the Retrieval Function\nWe'll create a function to:\n1️⃣ Accept the user's query.\n2️⃣ Generate embeddings for the query.\n3️⃣ Retrieve the top-k most relevant documents using FAISS.\n"

In [9]:
#  Load the FAISS Index

# Load the FAISS index from Google Drive
index_path = "/content/gdrive/MyDrive/dataset/Ai Doctor/pubmedqa_faiss.index"
index = faiss.read_index(index_path)

print(" FAISS index loaded successfully!")


 FAISS index loaded successfully!


In [10]:

'''
Define the Retrieval Function
We'll create a function to:
1️⃣ Accept the user's query.
2️⃣ Generate embeddings for the query.
3️⃣ Retrieve the top-k most relevant documents using FAISS.
'''

def retrieve_top_k(query, k=5):
    """
    Retrieves the top-k most similar documents from FAISS for a given query.

    Args:
    - query (str): User query
    - k (int): Number of top results to retrieve

    Returns:
    - List of top-k retrieved results with metadata
    """
    # Create embeddings for the query
    query_embedding = model.encode([query])

    # Search FAISS index
    distances, indices = index.search(query_embedding, k)

    # Retrieve top-k results
    results = []
    for i in range(len(indices[0])):
        # Convert index to integer
        index_value = int(indices[0][i])

        # Get the key corresponding to the index
        key = list(data.keys())[index_value]

        result = {
            "id": key,  # Use the key instead of the index
            "distance": distances[0][i],
            "question": data[key].get('QUESTION', data[key].get('question', '')),  # Access data using the key, fallback to 'question' if 'QUESTION' not found, then to an empty string if neither is found
            "context": data[key].get('CONTEXTS', ''),
            "answer": data[key].get('long_answer', '') # Use get method with a default value to handle missing 'long_answer'
        }
        results.append(result)

    return results



In [11]:
# Testing the Retrieval Function

query = "What are the symptoms of diabetes?"
top_k_results = retrieve_top_k(query, k=5)

# Display the top-k results
for i, res in enumerate(top_k_results):
    print(f"\n🔹 Result {i+1}")
    print(f"Question: {res['question']}")
    print(f"Context: {res['context']}")
    print(f"Distance: {res['distance']:.4f}")



🔹 Result 1
Question: Are lower fasting plasma glucose levels at diagnosis of type 2 diabetes associated with improved outcomes?
Context: ['Type 2 diabetes may be present for several years before diagnosis, by which time many patients have already developed diabetic complications. Earlier detection and treatment may reduce this burden, but evidence to support this approach is lacking.', 'Glycemic control and clinical and surrogate outcomes were compared for 5,088 of 5,102 U.K. Diabetes Prospective Study participants according to whether they had low (<140 mg/dl [<7.8 mmol/l]), intermediate (140 to<180 mg/dl [7.8 to<10.0 mmol/l]), or high (>or =180 mg/dl [>or =10 mmol/l]) fasting plasma glucose (FPG) levels at diagnosis. Individuals who presented with and without diabetic symptoms were also compared.', 'Fewer people with FPG in the lowest category had retinopathy, abnormal biothesiometer measurements, or reported erectile dysfunction. The rate of increase in FPG and HbA(1c) during the s

In [12]:
# Load GPT-2 Model

!pip install transformers accelerate




In [13]:
# RAG Pipeline with GPT-2

from transformers import pipeline

# Load GPT-2 for text generation
generator = pipeline("text-generation", model="gpt2")


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


In [14]:
#  RAG Pipeline with GPT-2 Generation

'''
1️ Retrieval:

Retrieves top-k relevant documents from FAISS.
Uses the flattened context from FAISS retrieval.
2️ GPT-2 Generation:

Uses the retrieved context as a prompt.
Generates the answer using GPT-2.
Adds the result to the final output.

'''
def retrieve_and_generate(query, k=5, max_new_tokens=128): # Change max_tokens to max_new_tokens
    """
    Retrieves top-k relevant documents from FAISS and generates an answer using GPT-2.

    Args:
    - query (str): User query
    - k (int): Number of documents to retrieve
    - max_new_tokens (int): Maximum new tokens in the generated answer

    Returns:
    - List of dictionaries with question, context, and GPT-2 generated answer
    """
    # Retrieve top-k results from FAISS
    results = retrieve_top_k(query, k)

    # List to store the final output
    final_output = []

    # Generate answers using GPT-2
    for res in results:
        # Prepare the prompt for GPT-2
        prompt = f"""
        You are an AI medical assistant providing accurate responses based on medical literature.

        Context: {res['context']}

        Question: {res['question']}

        Provide a clear and accurate answer:
        """

        # Generate the answer using GPT-2
        # Use max_new_tokens instead of max_length
        response = generator(prompt, max_new_tokens=max_new_tokens, num_return_sequences=1)

        # Append the result to the final output
        final_output.append({
            "question": res['question'],
            "context": res['context'],
            "answer": response[0]["generated_text"],
            "distance": res['distance']
        })

    return final_output

In [23]:
# Testing the Full RAG + GPT-2 Pipeline

'''
Explanation:

query → Your medical question.
k=3 → Retrieves the top-3 documents.
Generates answers using GPT-2.
Displays the question, context, generated answer, and distance.
'''

query = "What are the symptoms of diabetes?"
full_results = retrieve_and_generate(query, k=3)

# Display the output
# print("\n RAG Pipeline Output with GPT-2 Generation:\n")
for i, res in enumerate(full_results):
    # print(f"🔹 Result {i+1}")
    print(f"Question: {res['question']}")
    # print(f"Context: {res['context']}")
    print(f"Generated Answer: {res['answer']}")
    # print(f"Distance: {res['distance']:.4f}")
    print("="*100)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Question: Are lower fasting plasma glucose levels at diagnosis of type 2 diabetes associated with improved outcomes?
Generated Answer: 
        You are an AI medical assistant providing accurate responses based on medical literature.
        
        Context: ['Type 2 diabetes may be present for several years before diagnosis, by which time many patients have already developed diabetic complications. Earlier detection and treatment may reduce this burden, but evidence to support this approach is lacking.', 'Glycemic control and clinical and surrogate outcomes were compared for 5,088 of 5,102 U.K. Diabetes Prospective Study participants according to whether they had low (<140 mg/dl [<7.8 mmol/l]), intermediate (140 to<180 mg/dl [7.8 to<10.0 mmol/l]), or high (>or =180 mg/dl [>or =10 mmol/l]) fasting plasma glucose (FPG) levels at diagnosis. Individuals who presented with and without diabetic symptoms were also compared.', 'Fewer people with FPG in the lowest category had retinopathy, ab

In [16]:
full_results = retrieve_and_generate(query, k=10)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [17]:
# building a front end interface using gradio

!pip install gradio


Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [18]:
import gradio as gr


In [21]:
# Define the Chatbot Function

'''
1 Takes the user query as input.
2️ Runs the RAG pipeline:

Retrieves relevant contexts from FAISS.
Generates answers using GPT-2.
3️ Displays:
The retrieved context.
The GPT-2 generated answer.
'''



"""
    Chatbot function to handle user queries:
    - Retrieves context using FAISS
    - Generates answers using GPT-2
    - Displays the context and generated answer
"""

'''
1 The function:
Accepts a user query.
Runs the RAG pipeline.
Retrieves context and generates an answer.
2️ Displays the:
Question
Context
GPT-2 generated answer
Similarity distance
'''
def ai_doctor_chatbot(query):

    # Retrieve and generate answer
    results = retrieve_and_generate(query, k=3)

    # Prepare chatbot output
    output = ""

    for i, res in enumerate(results):
        output += f"🔹 **Result {i+1}**\n"
        output += f"**Question:** {res['question']}\n"
        output += f"**Context:** {res['context']}\n"
        output += f"**Generated Answer:** {res['answer']}\n"
        output += f"**Distance:** {res['distance']:.4f}\n"
        output += "=" * 100 + "\n"

    return output


In [22]:
# Creating the Gradio Interface

# Create the Gradio interface
iface = gr.Interface(
    fn=ai_doctor_chatbot,
    inputs=gr.Textbox(label="Ask your medical question:"),
    outputs="text",
    title="🩺 Ask Your AI Doctor",
    description="Enter a medical question and get accurate answers based on the PubMedQA dataset. Powered by RAG with FAISS + GPT-2."
)

# Launch the interface
iface.launch(share=True)



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2556c9ecee74127f12.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


