In [None]:
pip install transformers sentence-transformers gradio faiss-cpu pandas numpy

Collecting gradio
  Downloading gradio-5.7.1-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.0 (from gradio)
  Downloading gradio_client-1.5.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadat

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import gradio as gr

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Load the dataset
file_path = "train.csv"  # Replace with the actual dataset file path
data = pd.read_csv(file_path)

# Display the first few rows
print(data.head())

             qtype                                           Question  \
0   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   
1         symptoms  What are the symptoms of Lymphocytic Choriomen...   
2   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   
3  exams and tests  How to diagnose Lymphocytic Choriomeningitis (...   
4        treatment  What are the treatments for Lymphocytic Chorio...   

                                              Answer  
0  LCMV infections can occur after exposure to fr...  
1  LCMV is most commonly recognized as causing ne...  
2  Individuals of all ages who come into contact ...  
3  During the first phase of the disease, the mos...  
4  Aseptic meningitis, encephalitis, or meningoen...  


In [None]:
# Preprocessing
data = data[['qtype', 'Question', 'Answer']].dropna()
data.columns = ['qtype', 'question', 'answer']  # Rename for simplicity

# Convert to lists
qtypes = data['qtype'].tolist()
questions = data['question'].tolist()
answers = data['answer'].tolist()

print(f"Dataset contains {len(questions)} questions across {data['qtype'].nunique()} categories.")

Dataset contains 16407 questions across 16 categories.


In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')  # Optimized for semantic search

In [None]:
# Generate embeddings for all questions

question_embeddings = model.encode(questions, show_progress_bar=True)

# Save embeddings for future use
np.save("question_embeddings.npy", question_embeddings)
print("Embeddings created and saved.")

Batches:   0%|          | 0/513 [00:00<?, ?it/s]

Embeddings created and saved.


In [None]:


# Initialize FAISS index
dimension = question_embeddings.shape[1]  # Embedding vector dimension
index = faiss.IndexFlatL2(dimension)

# Add embeddings to the FAISS index
index.add(np.array(question_embeddings))
print(f"FAISS index contains {index.ntotal} items.")

FAISS index contains 16407 items.


In [None]:
def search_faq(query, top_k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        results.append({
            'qtype': qtypes[idx],
            'question': questions[idx],
            'answer': answers[idx]
        })
    return results


In [None]:
query = "What are the symptoms of flu?"
results = search_faq(query)

for result in results:
    print(f"Category: {result['qtype']}")
    print(f"Question: {result['question']}")
    print(f"Answer: {result['answer']}\n")

Category: symptoms
Question: What are the symptoms of Q Fever ?
Answer: Q fever can cause acute or chronic illness in humans, who usually acquire infection after contact with infected animals or exposure to contaminated environments. The acute symptoms caused by infection with Coxiella burnetii usually develop within 2-3 weeks of exposure, although as many as half of humans infected withC. burnetii do not show symptoms. 
 
The following is a list of symptoms commonly seen with acute Q fever. However, it is important to note that the combination of symptoms varies greatly from person to person. 
  
  - high fevers (up to 104-105°F)  
  - severe headache  
  - general malaise  
  - myalgia  
  - chills and/or sweats  
  - non-productive cough  
  - nausea  
  - vomiting  
  - diarrhea  
  - abdominal pain  
  - chest pain  
  
 
Although most persons with acute Q fever infection recover, others may experience serious illness with complications that may include pneumonia, granulomatous he

In [None]:
def chatbot(query):
    results = search_faq(query)
    if results:
        top_result = results[0]
        response = (
            f"**Category:** {top_result['qtype']}\n"
            f"**Question:** {top_result['question']}\n"
            f"**Answer:** {top_result['answer']}"
        )
        return response
    else:
        return "Sorry, I couldn't find an answer to your question."


In [None]:
import gradio as gr

interface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Enter your medical question..."),
    outputs="text",
    title="Healthcare Chatbot",
    description="Ask any medical question, and get instant answers with category-specific context!"
)


In [None]:
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5aa42b8ef41ea0b4d6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




SyntaxError: invalid syntax (2049942660.py, line 1)

In [None]:
def chatbot_with_category(query, category=None):
    results = search_faq(query)
    if category:
        # Filter results by category
        results = [res for res in results if res['qtype'].lower() == category.lower()]

    if results:
        top_result = results[0]
        response = (
            f"**Category:** {top_result['qtype']}\n"
            f"**Question:** {top_result['question']}\n"
            f"**Answer:** {top_result['answer']}"
        )
        return response
    else:
        return f"Sorry, no results found in the '{category}' category."

In [None]:
interface = gr.Interface(
    fn=lambda query, category: chatbot_with_category(query, category),
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your medical question..."),
        gr.Dropdown(choices=['symptoms', 'treatment', 'prevention', 'other'], label="Category (optional)")
    ],
    outputs="text",
    title="Healthcare Chatbot",
    description="Ask any medical question, and filter by category if desired!"
)

In [None]:
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1712034a2820e6cb1b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


