In [1]:
# Import necessary libraries
import pandas as pd
import json
import re

In [2]:
#Load MedQuAD dataset
medquad_df = pd.read_csv("/Users/manikantang/Documents/Medical Chatbot/medquad.csv")
disease_df = pd.read_csv("/Users/manikantang/Documents/Medical Chatbot/DiseaseAndSymptoms.csv")
precaution_df = pd.read_csv("/Users/manikantang/Documents/Medical Chatbot/Disease precaution.csv")

In [3]:
#Clean column names
medquad_df.columns=[col.strip().lower().replace(" ", "_") for col in medquad_df.columns]
disease_df.columns = [col.strip().lower().replace(" ", "_") for col in disease_df.columns]
precaution_df.columns = [col.strip().lower().replace(" ", "_") for col in precaution_df.columns]

In [4]:
medquad_df.rename(columns={"Q_Text": "question", "A_Text": "answer"}, inplace=True)

In [5]:
#Define basic keyword matching function for MedQuAD
def search_medquad(query):
    pattern = re.compile(query, re.IGNORECASE)
    results = medquad_df[medquad_df['question'].str.contains(pattern)]
    return results[['question', 'answer']].head(3)

In [6]:
def match_disease(symptom_query):
    matched_diseases = []
    
    for _, row in disease_df.iterrows():
        symptoms = ' '.join([
            str(row[col]) for col in disease_df.columns
            if 'symptom' in col.lower() and pd.notna(row[col])
        ]).lower()
        
        if any(symptom in symptoms for symptom in symptom_query.lower().split()):
            matched_diseases.append(row['disease'])
    
    matched_diseases = list(set(matched_diseases))[:3]  # Top 3
    output = []
    
    for disease in matched_diseases:
        precautions = precaution_df[precaution_df['disease'].str.lower() == disease.lower()]
        output.append({
            "Disease": disease,
            "Precautions": precautions.iloc[0, 1:].dropna().tolist() if not precautions.empty else ["No data"]
        })
    
    return output


In [7]:
#Sample user query
user_input = input("Hi! Please describe your issue or ask a question: ")

In [8]:
#Basic decision routing
if any(word in user_input.lower() for word in ['what', 'why', 'how', 'can', 'does']):
    print("\n--- MedQuAD Answer(s) ---")
    answers = search_medquad(user_input)
    if answers.empty:
        print("No exact match found in MedQuAD.")
    else:
        for _, row in answers.iterrows():
            print(f"Q: {row['question']}\nA: {row['answer']}\n")
else:
    print("\n--- Symptom-Based Disease Match ---")
    matches = match_disease(user_input)
    if not matches:
        print("Could you please rephrase or provide a more detailed explanation of the symptoms.")
    else:
        for m in matches:
            print(f"\nPossible Disease: {m['Disease']}")
            print("Precautions:", ", ".join(m["Precautions"]))


--- Symptom-Based Disease Match ---

Possible Disease: Hyperthyroidism
Precautions: eat healthy, massage, use lemon balm, take radioactive iodine treatment

Possible Disease: Migraine
Precautions: meditation, reduce stress, use poloroid glasses in sun, consult doctor

Possible Disease: hepatitis A
Precautions: Consult nearest hospital, wash hands through, avoid fatty spicy food, medication
