In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import matplotlib as plt
import joblib

# Load the dataset
df = pd.read_csv("medibot_realistic_dataset.csv")

# Separate features and label
X = df.drop("disease", axis=1)
y = df["disease"]

# Encode target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Build the model (RandomForest)
model = RandomForestClassifier(
    n_estimators=500,
    max_depth=None,
    n_jobs=-1,
    class_weight="balanced",
    random_state=42
)

model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", (accuracy_score(y_test, y_pred))*100)
print(classification_report(y_test, y_pred, target_names=le.classes_))

#Save model + label encoder + symptom columns
joblib.dump(model, "medibot_model.pkl")
joblib.dump(le, "medibot_label_encoder.pkl")
joblib.dump(list(X.columns), "medibot_symptom_list.pkl")

print("Model saved successfully!")


Accuracy: 0.9824561403508771
                  precision    recall  f1-score   support

         Acidity       1.00      1.00      1.00        24
         Allergy       1.00      1.00      1.00        24
      Bronchitis       1.00      0.96      0.98        24
           COVID       1.00      1.00      1.00        24
     Common Cold       1.00      1.00      1.00        24
          Dengue       1.00      1.00      1.00        24
          Eczema       0.96      1.00      0.98        24
Fatigue Syndrome       1.00      1.00      1.00        24
             Flu       1.00      0.96      0.98        24
  Food Poisoning       0.92      1.00      0.96        24
Fungal Infection       1.00      0.96      0.98        24
       Gastritis       1.00      0.92      0.96        24
 Gastroenteritis       1.00      0.92      0.96        24
         Malaria       0.96      0.96      0.96        24
        Migraine       0.92      1.00      0.96        24
       Pneumonia       0.96      1.00     

In [52]:
import re
import joblib
import numpy as np

# Load trained model + artifacts
model = joblib.load("medibot_model.pkl")
label_encoder = joblib.load("medibot_label_encoder.pkl")
symptom_list = joblib.load("medibot_symptom_list.pkl")

symptom_set = set(symptom_list)


In [53]:
def clean_text(t: str) -> str:
    t = t.lower()
    t = re.sub(r"[^a-z0-9\s]", " ", t)   # remove punctuation
    t = re.sub(r"\s+", " ", t).strip()
    return t


In [54]:
# Map user phrases to one or more canonical symptom feature names
symptom_synonyms = {
    # Common cold / flu / viral
    "cold": ["runny_nose", "cough", "congestion", "sneezing"],
    "common cold": ["runny_nose", "cough", "congestion", "sneezing"],
    "flu": ["high_fever", "body_pain", "fatigue", "cough", "chills"],
    "fever": ["fever"],
    "high fever": ["high_fever"],
    "low fever": ["mild_fever"],
    "mild fever": ["mild_fever"],
    "temperature": ["fever"],
    "shivering": ["chills"],
    "chills": ["chills"],

    # Head-related
    "headache": ["headache"],
    "migraine": ["headache", "light_sensitivity"],

    # Nose / throat
    "runny nose": ["runny_nose"],
    "running nose": ["runny_nose"],
    "blocked nose": ["congestion"],
    "stuffy nose": ["congestion"],
    "sore throat": ["sore_throat"],
    "throat pain": ["sore_throat"],

    # Cough
    "cough": ["cough"],
    "dry cough": ["dry_cough"],
    "wet cough": ["cough"],

    # Chest
    "chest pain": ["chest_pain"],
    "chest discomfort": ["chest_discomfort"],

    # Stomach / digestion
    "stomach pain": ["stomach_pain"],
    "abdominal pain": ["stomach_pain"],
    "stomach cramps": ["stomach_cramps"],
    "cramps": ["stomach_cramps"],
    "acidity": ["heartburn", "stomach_burning"],
    "gas": ["bloating"],
    "bloating": ["bloating"],
    "vomiting": ["vomiting"],
    "nausea": ["nausea"],
    "loose motion": ["diarrhea"],
    "diarrhea": ["diarrhea"],
    "diarrhoea": ["diarrhea"],

    # Fatigue / body pain
    "tired": ["fatigue"],
    "tiredness": ["fatigue"],
    "fatigue": ["fatigue"],
    "body pain": ["body_pain"],
    "body ache": ["body_pain"],
    "weakness": ["low_energy"],
    "low energy": ["low_energy"],

    # Joints
    "joint pain": ["joint_pain"],

    # Skin / allergy
    "rash": ["rash", "skin_rash"],
    "skin rash": ["skin_rash"],
    "itching": ["itching"],
    "itchy skin": ["itching"],
    "allergy": ["sneezing", "runny_nose", "itchy_eyes", "skin_rash"],
    "red patches": ["rash"],
    "redness": ["redness"],
    "burning sensation": ["burning"],

    # Breathing / lungs
    "breathless": ["chest_discomfort"],
    "breathing problem": ["chest_discomfort"],

    # COVID-like
    "loss of smell": ["loss_of_smell"],
    "loss of taste": ["loss_of_taste"],
}


In [55]:
def extract_symptoms_from_text(text: str):
    text_clean = clean_text(text)
    symptoms_found = set()

    # 1) Match synonyms
    for phrase, canonical in symptom_synonyms.items():
        if phrase in text_clean:
            if isinstance(canonical, list):
                for s in canonical:
                    if s in symptom_set:
                        symptoms_found.add(s)
            else:
                if canonical in symptom_set:
                    symptoms_found.add(canonical)

    # 2) Direct match against known symptom names (like "vomiting", "fatigue")
    for sym in symptom_list:
        phrase = sym.replace("_", " ")
        if phrase in text_clean:
            symptoms_found.add(sym)

    return list(symptoms_found)


In [56]:
extract_symptoms_from_text("I am having cold for 5 days and low fever")
# expect something like: ['runny_nose', 'cough', 'congestion', 'sneezing', 'mild_fever']


['sneezing', 'fever', 'cough', 'runny_nose', 'congestion', 'mild_fever']

In [57]:
def predict_disease_from_symptoms(symptoms_input):
    """
    symptoms_input: list of canonical symptom feature names (strings)
    """
    # vector of 0/1 for all symptoms in the same order as training
    x = np.zeros(len(symptom_list), dtype=int)
    idx_map = {sym: i for i, sym in enumerate(symptom_list)}

    for s in symptoms_input:
        if s in idx_map:
            x[idx_map[s]] = 1

    x = x.reshape(1, -1)
    pred_label = model.predict(x)[0]
    disease = label_encoder.inverse_transform([pred_label])[0]
    return disease


In [58]:
disease_descriptions = {
    "Common Cold": "A mild viral infection of the upper respiratory tract causing sneezing, runny nose, and sore throat.",
    "Flu": "An infectious respiratory illness with high fever, body aches, and fatigue, usually more severe than common cold.",
    "Viral Fever": "A general term for fever caused by a viral infection, often with headache and tiredness.",
    "Sinusitis": "Inflammation of the sinuses causing headache, facial pain, and nasal congestion.",
    "Migraine": "A type of headache often with throbbing pain, nausea, and sensitivity to light.",
    "COVID": "A viral respiratory infection which may present with fever, cough, fatigue, and loss of smell or taste.",
    "Gastritis": "Inflammation of the stomach lining leading to stomach pain, nausea, or bloating.",
    "Food Poisoning": "Illness caused by contaminated food, leading to vomiting, diarrhea, and stomach cramps.",
    "Gastroenteritis": "Infection of the stomach and intestines causing diarrhea, vomiting, and fever.",
    "Acidity": "Excess acid in the stomach leading to burning sensation, heartburn, or regurgitation.",
    "Allergy": "Immune reaction to a substance, causing sneezing, runny nose, itching, or skin rash.",
    "Fungal Infection": "Skin infection due to fungus causing itching, redness, and rash.",
    "Eczema": "A chronic skin condition with dry, itchy, and inflamed patches.",
    "Tension Headache": "Headache related to stress or muscle tension in the neck and scalp.",
    "Fatigue Syndrome": "Persistent tiredness and low energy not relieved by rest.",
    "Dengue": "Mosquito-borne viral illness with high fever, joint pain, and sometimes rash.",
    "Malaria": "Mosquito-borne parasitic infection with high fever, chills, and sweating.",
    "Bronchitis": "Inflammation of the airways causing cough with mucus and chest discomfort.",
    "Pneumonia": "Infection of the lungs leading to cough, fever, chest pain, and difficulty breathing."
}

disease_precautions = {
    "Common Cold": [
        "Rest and stay hydrated",
        "Use warm fluids like soup or tea",
        "Use steam inhalation for congestion",
        "Avoid cold drinks and dust exposure"
    ],
    "Flu": [
        "Take adequate rest",
        "Drink plenty of fluids",
        "Use paracetamol for fever as advised by a doctor",
        "Avoid close contact with others to prevent spread"
    ],
    "Viral Fever": [
        "Monitor temperature regularly",
        "Stay hydrated with water and ORS",
        "Avoid heavy or oily meals",
        "Consult a doctor if fever persists more than 3 days"
    ],
    "Sinusitis": [
        "Use steam inhalation",
        "Use warm compress on the face",
        "Avoid cold air and strong perfumes",
        "Consult a doctor if pain is severe"
    ],
    "Migraine": [
        "Rest in a dark, quiet room",
        "Avoid known triggers like lack of sleep or strong smells",
        "Use prescribed migraine medication",
        "Consult a doctor if headaches are frequent"
    ],
    "COVID": [
        "Isolate yourself and wear a mask",
        "Monitor oxygen level and temperature",
        "Stay hydrated and rest",
        "Get tested and consult a doctor if symptoms worsen"
    ],
    "Gastritis": [
        "Avoid spicy and oily food",
        "Eat smaller, frequent meals",
        "Avoid alcohol and smoking",
        "Consult a doctor if pain is persistent"
    ],
    "Food Poisoning": [
        "Drink ORS to prevent dehydration",
        "Avoid solid food until vomiting stops",
        "Do not take medicines without medical advice",
        "Consult a doctor if blood appears in stool or vomit"
    ],
    "Gastroenteritis": [
        "Drink plenty of fluids",
        "Eat light, bland food",
        "Avoid milk and dairy for a while",
        "Consult a doctor if diarrhea is severe"
    ],
    "Acidity": [
        "Avoid spicy, fried food and late-night eating",
        "Do not lie down immediately after meals",
        "Limit tea, coffee, and carbonated drinks",
        "Consult a doctor if symptoms are frequent"
    ],
    "Allergy": [
        "Avoid known allergens (dust, pollen, pets etc.)",
        "Use mask in dusty environments",
        "Use anti-allergic medication if prescribed",
        "Consult doctor if breathing difficulty occurs"
    ],
    "Fungal Infection": [
        "Keep affected area clean and dry",
        "Avoid tight clothing",
        "Use antifungal cream as prescribed",
        "Do not scratch the area"
    ],
    "Eczema": [
        "Moisturize skin regularly",
        "Avoid harsh soaps and hot water",
        "Do not scratch the skin",
        "Consult a dermatologist for proper treatment"
    ],
    "Tension Headache": [
        "Take breaks from continuous screen time",
        "Practice relaxation and stress management",
        "Maintain good posture",
        "Consult doctor if headaches persist"
    ],
    "Fatigue Syndrome": [
        "Maintain regular sleep schedule",
        "Include light exercise in routine",
        "Eat balanced, nutritious food",
        "Consult doctor if fatigue is long-lasting"
    ],
    "Dengue": [
        "Do not take aspirin or ibuprofen",
        "Drink plenty of fluids and ORS",
        "Monitor platelets and fever",
        "Consult doctor or visit hospital immediately"
    ],
    "Malaria": [
        "Consult doctor for blood test and treatment",
        "Sleep under mosquito net",
        "Complete full course of anti-malarial drugs",
        "Avoid mosquito bites"
    ],
    "Bronchitis": [
        "Avoid smoking and polluted air",
        "Drink warm fluids",
        "Use cough medicines if prescribed",
        "Consult doctor if cough persists more than 2 weeks"
    ],
    "Pneumonia": [
        "Consult doctor immediately",
        "Do not ignore chest pain or breathing difficulty",
        "Complete prescribed antibiotic course",
        "Take adequate rest"
    ]
}


In [59]:
def medibot_reply(user_text: str) -> str:
    # 1) Extract symptoms
    symptoms = extract_symptoms_from_text(user_text)

    if not symptoms:
        return (
            "I‚Äôm sorry, I couldn‚Äôt clearly understand your symptoms from that.\n"
            "Could you describe them in a bit more detail? For example:\n"
            "\"I have headache and mild fever since 3 days.\""
        )

    # 2) Predict disease
    disease = predict_disease_from_symptoms(symptoms)

    # 3) Get description & precautions
    description = disease_descriptions.get(
        disease,
        "No detailed description available for this condition."
    )
    precautions = disease_precautions.get(disease, [])

    # 4) Build human-like reply
    resp = []
    resp.append(f"ü©∫ I understood these symptoms from your message: {', '.join(symptoms)}")
    resp.append("")
    resp.append(f"üìå Based on this, you may be suffering from: **{disease}**")
    resp.append("")
    resp.append("‚ÑπÔ∏è About this condition:")
    resp.append(description)
    resp.append("")

    if precautions:
        resp.append("‚úÖ Suggested precautions and next steps:")
        for p in precautions:
            resp.append(f"- {p}")
    else:
        resp.append("‚úÖ General advice: rest well, stay hydrated, and monitor your symptoms.")

    resp.append("")
    resp.append(
        "‚ö†Ô∏è This is not a confirmed medical diagnosis.\n"
        "If your symptoms are severe, worsening, or you feel very unwell, "
        "please consult a qualified doctor or visit a hospital immediately."
    )

    return "\n".join(resp)


In [62]:

print(medibot_reply("I am feeling very tired with body pain and fever"))


ü©∫ I understood these symptoms from your message: body_pain, fatigue, fever

üìå Based on this, you may be suffering from: **Fatigue Syndrome**

‚ÑπÔ∏è About this condition:
Persistent tiredness and low energy not relieved by rest.

‚úÖ Suggested precautions and next steps:
- Maintain regular sleep schedule
- Include light exercise in routine
- Eat balanced, nutritious food
- Consult doctor if fatigue is long-lasting

‚ö†Ô∏è This is not a confirmed medical diagnosis.
If your symptoms are severe, worsening, or you feel very unwell, please consult a qualified doctor or visit a hospital immediately.


