In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")


In [3]:
df = pd.read_csv('Training.csv')


In [4]:
df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [5]:
df.shape

(4920, 133)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 5.0+ MB


In [7]:
df.isnull().sum()

itching                 0
skin_rash               0
nodal_skin_eruptions    0
continuous_sneezing     0
shivering               0
                       ..
inflammatory_nails      0
blister                 0
red_sore_around_nose    0
yellow_crust_ooze       0
prognosis               0
Length: 133, dtype: int64

In [8]:
df['prognosis'].value_counts().head()

prognosis
Fungal infection       120
Allergy                120
GERD                   120
Chronic cholestasis    120
Drug Reaction          120
Name: count, dtype: int64

In [9]:
X = df.drop('prognosis', axis=1)
y = df['prognosis']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)


clf.fit(X_train, y_train)




0,1,2
,criterion,'entropy'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [12]:

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


In [13]:
accuracy

1.0

In [14]:
description_df = pd.read_csv("description.csv")
precautions_df = pd.read_csv("precautions_df.csv")
medications_df = pd.read_csv("medications.csv")
diets_df = pd.read_csv("diets.csv")
workout_df = pd.read_csv("workout_df.csv")

In [15]:
medications_df.head(
)

Unnamed: 0,Disease,Medication
0,Fungal infection,"['Antifungal Cream', 'Fluconazole', 'Terbinafi..."
1,Allergy,"['Antihistamines', 'Decongestants', 'Epinephri..."
2,GERD,"['Proton Pump Inhibitors (PPIs)', 'H2 Blockers..."
3,Chronic cholestasis,"['Ursodeoxycholic acid', 'Cholestyramine', 'Me..."
4,Drug Reaction,"['Antihistamines', 'Epinephrine', 'Corticoster..."


In [16]:
precautions_df.head()

Unnamed: 0.1,Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


In [17]:
import numpy as np

# --- 1. BUILD DICTIONARIES FOR FAST LOOKUP ---
# This converts the CSV data into Python Dictionaries {Key: Value}
# This is much faster than searching the dataframe every single time.

desc_dict = dict(zip(description_df['Disease'], description_df['Description']))
precaution_dict = dict(zip(precautions_df['Disease'], precautions_df[['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']].values.tolist()))
medication_dict = dict(zip(medications_df['Disease'], medications_df['Medication']))
diet_dict = dict(zip(diets_df['Disease'], diets_df['Diet']))
workout_dict = dict(zip(workout_df['disease'], workout_df['workout']))

# Helper Function: Converts user symptoms into the 132-length number array
def predict_disease(user_symptoms):
    # Get all 132 symptom names from the training data columns
    # (Make sure 'X' is still in memory from Step 2!)
    all_symptoms = X.columns.tolist()

    # Create a list of 132 zeros
    input_vector = np.zeros(len(all_symptoms))

    # For each symptom the user has, find its index and set it to 1
    for sym in user_symptoms:
        # We strip whitespace and make sure it matches the column name exactly
        sym = sym.strip()
        if sym in all_symptoms:
            index = all_symptoms.index(sym)
            input_vector[index] = 1
        else:
            print(f"‚ö†Ô∏è Warning: Symptom '{sym}' not recognized by the model.")

    # Predict! (reshape(1, -1) tells sklearn this is a single sample)
    return clf.predict(input_vector.reshape(1, -1))[0]

# --- 2. THE MAIN SYSTEM ---

def doctor_bot(symptoms_list):
    # A. Get the Disease Prediction
    predicted_disease = predict_disease(symptoms_list)

    # B. Fetch Details from Dictionaries
    desc = desc_dict.get(predicted_disease, "No description available")
    precautions = precaution_dict.get(predicted_disease, ["Consult doctor"])
    meds = medication_dict.get(predicted_disease, "Consult doctor")
    diet = diet_dict.get(predicted_disease, "Consult doctor")
    workout = workout_dict.get(predicted_disease, "Consult doctor")

    # C. Print the Report
    print("\n" + "="*40)
    print(f"ü©∫  DIAGNOSIS: {predicted_disease.upper()}")
    print("="*40)
    print(f"üìÑ Description: {desc}")
    print(f"üíä Medication:  {meds}")
    print(f"ü•ó Diet:        {diet}")
    print(f"üèÉ Workout:     {workout}")
    print("-" * 40)
    print("üõ°Ô∏è  Precautions:")
    for i, p in enumerate(precautions, 1):
        print(f"   {i}. {p}")
    print("="*40 + "\n")

print("‚úÖ Doctor Bot Engine is Ready!")


‚úÖ Doctor Bot Engine is Ready!


In [18]:
doctor_bot(['itching', 'skin_rash', 'nodal_skin_eruptions'])


ü©∫  DIAGNOSIS: FUNGAL INFECTION
üìÑ Description: Fungal infection is a common skin condition caused by fungi.
üíä Medication:  ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']
ü•ó Diet:        ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']
üèÉ Workout:     Eat fruits and vegetables
----------------------------------------
üõ°Ô∏è  Precautions:
   1. bath twice
   2. use detol or neem in bathing water
   3. keep infected area dry
   4. use clean cloths



In [19]:
# Get the list of valid symptoms so we can help the user
valid_symptoms = X.columns.tolist()

print("\n" + "="*50)
print("ü§ñ  WELCOME TO DR. AI DIAGNOSTIC SYSTEM  ü§ñ")
print("="*50)
print("Type your symptoms separated by commas.")
print("Example: itching, skin_rash, chills")
print("Type 'quit' or 'exit' to stop.")
print("-" * 50)

while True:
    # 1. Get User Input
    user_input = input("\nüìù Enter Symptoms: ").lower()

    # 2. Check for Exit
    if user_input in ['quit', 'exit', 'bye']:
        print("üëã Stay healthy! Dr. AI signing off.")
        break

    # 3. Process the Input string into a list
    # We split by comma (',') and strip extra spaces
    # Example: " itching, skin_rash " -> ['itching', 'skin_rash']
    symptoms_list = [sym.strip() for sym in user_input.split(',')]

    # 4. Run the Doctor Bot!
    # We wrap it in a try-except block just in case of unexpected errors
    try:
        doctor_bot(symptoms_list)
    except Exception as e:
        print(f"‚ö†Ô∏è Error: {e}")



ü§ñ  WELCOME TO DR. AI DIAGNOSTIC SYSTEM  ü§ñ
Type your symptoms separated by commas.
Example: itching, skin_rash, chills
Type 'quit' or 'exit' to stop.
--------------------------------------------------

ü©∫  DIAGNOSIS: DIMORPHIC HEMMORHOIDS(PILES)
üìÑ Description: Dimorphic hemmorhoids(piles) is a condition characterized by swollen blood vessels in the rectum.
üíä Medication:  ['Nitroglycerin', 'Aspirin', 'Beta-blockers', 'Calcium channel blockers', 'Thrombolytic drugs']
ü•ó Diet:        ['Hemorrhoids Diet', 'High-Fiber Diet', 'Hydration', 'Warm baths', 'Stool softeners']
üèÉ Workout:     Follow medical recommendations
----------------------------------------
üõ°Ô∏è  Precautions:
   1. avoid fatty spicy food
   2. consume witch hazel
   3. warm bath with epsom salt
   4. consume alovera juice


ü©∫  DIAGNOSIS: DIMORPHIC HEMMORHOIDS(PILES)
üìÑ Description: Dimorphic hemmorhoids(piles) is a condition characterized by swollen blood vessels in the rectum.
üíä Medication:  ['N

In [23]:
# 1. Get all column names (symptoms)
all_symptoms = X.columns.tolist()

# 2. Let's check if 'cough' is in there
if 'fever' in all_symptoms:
    print("‚úÖ 'cough' is a valid symptom.")
else:
    print("‚ùå 'cough' is NOT found.")
    print("\nüîç Did you mean one of these?")
    # Let's search for similar words
    for s in all_symptoms:
        if 'fever' in s:
            print(f"   - {s}")


‚ùå 'cough' is NOT found.

üîç Did you mean one of these?
   - high_fever
   - mild_fever


In [24]:
# Get the list of valid symptoms
valid_symptoms = X.columns.tolist()

print("\n" + "="*50)
print("ü§ñ  DR. AI (DEBUG MODE)  ü§ñ")
print("="*50)

while True:
    user_input = input("\nüìù Enter Symptoms (comma-separated): ").lower()

    if user_input in ['quit', 'exit']:
        break

    # Split and clean user input
    raw_symptoms = [x.strip().replace(" ", "_") for x in user_input.split(',')]
    # Note: The dataset uses underscores like 'skin_rash', so we replace spaces with _

    final_symptoms = []
    print("\nüîç Verifying symptoms...")

    for sym in raw_symptoms:
        if sym in valid_symptoms:
            print(f"   ‚úÖ Found: '{sym}'")
            final_symptoms.append(sym)
        else:
            print(f"   ‚ùå Ignored: '{sym}' (Not in database)")

    if len(final_symptoms) > 0:
        print(f"\nüöÄ Predicting based on: {final_symptoms}")
        doctor_bot(final_symptoms)
    else:
        print("‚ö†Ô∏è No valid symptoms found. Please try again.")



ü§ñ  DR. AI (DEBUG MODE)  ü§ñ

üîç Verifying symptoms...
   ‚úÖ Found: 'high_fever'

üöÄ Predicting based on: ['high_fever']

ü©∫  DIAGNOSIS: AIDS
üìÑ Description: AIDS (Acquired Immunodeficiency Syndrome) is a disease caused by HIV that weakens the immune system.
üíä Medication:  ['Antiretroviral drugs', 'Protease inhibitors', 'Integrase inhibitors', 'Entry inhibitors', 'Fusion inhibitors']
ü•ó Diet:        ['Balanced Diet', 'Protein-rich foods', 'Fruits and vegetables', 'Whole grains', 'Healthy fats']
üèÉ Workout:     Consult a healthcare professional
----------------------------------------
üõ°Ô∏è  Precautions:
   1. avoid open cuts
   2. wear ppe if possible
   3. consult doctor
   4. follow up


üîç Verifying symptoms...
   ‚ùå Ignored: 'bye' (Not in database)
‚ö†Ô∏è No valid symptoms found. Please try again.

üîç Verifying symptoms...
   ‚ùå Ignored: 'bye' (Not in database)
‚ö†Ô∏è No valid symptoms found. Please try again.


In [22]:
# Get all symptoms and sort them
all_symptoms = sorted(X.columns.tolist())

print(f"üìã Total Symptoms Available: {len(all_symptoms)}\n")

# Print them in a nice grid
for i, sym in enumerate(all_symptoms):
    # Print in columns of 4 to make it readable
    print(f"{sym:<25}", end="")
    if (i + 1) % 4 == 0:
        print() # New line


üìã Total Symptoms Available: 132

abdominal_pain           abnormal_menstruation    acidity                  acute_liver_failure      
altered_sensorium        anxiety                  back_pain                belly_pain               
blackheads               bladder_discomfort       blister                  blood_in_sputum          
bloody_stool             blurred_and_distorted_visionbreathlessness           brittle_nails            
bruising                 burning_micturition      chest_pain               chills                   
cold_hands_and_feets     coma                     congestion               constipation             
continuous_feel_of_urine continuous_sneezing      cough                    cramps                   
dark_urine               dehydration              depression               diarrhoea                
dischromic _patches      distention_of_abdomen    dizziness                drying_and_tingling_lips 
enlarged_thyroid         excessive_hunger         ex

In [25]:
import difflib # This library helps find close matches

# 1. Get all valid symptoms
valid_symptoms = X.columns.tolist()

print("\n" + "="*60)
print("ü§ñ  DR. AI - SMART ASSISTANT MODE  ü§ñ")
print("="*60)
print("Type your symptoms (e.g., 'fever', 'pain').")
print("I will help you find the right medical term!")
print("-" * 60)

while True:
    user_input = input("\nüìù Describe a symptom: ").lower().strip()

    if user_input in ['quit', 'exit', 'done']:
        break

    # --- LOGIC 1: EXACT MATCH ---
    if user_input in valid_symptoms:
        print(f"   ‚úÖ Accepted: '{user_input}'")
        # In a real app, we would add this to a 'final_list' here
        continue

    # --- LOGIC 2: PARTIAL MATCH ("fever" -> "high_fever") ---
    # We look for symptoms that CONTAIN the user's word
    found_matches = [sym for sym in valid_symptoms if user_input in sym]

    # --- LOGIC 3: FUZZY MATCH ("fevr" -> "fever") ---
    # If no partial match, check for typos
    if not found_matches:
        found_matches = difflib.get_close_matches(user_input, valid_symptoms, n=3, cutoff=0.6)

    # --- DISPLAY RESULTS ---
    if found_matches:
        print(f"   ü§î I didn't find '{user_input}', but did you mean:")
        for i, match in enumerate(found_matches, 1):
            print(f"      {i}. {match}")
    else:
        print("   ‚ùå I couldn't find anything similar. Try a different word.")



ü§ñ  DR. AI - SMART ASSISTANT MODE  ü§ñ
Type your symptoms (e.g., 'fever', 'pain').
I will help you find the right medical term!
------------------------------------------------------------
   ‚ùå I couldn't find anything similar. Try a different word.
   ‚úÖ Accepted: 'cough'
   ‚ùå I couldn't find anything similar. Try a different word.
   ü§î I didn't find 'fever', but did you mean:
      1. high_fever
      2. mild_fever
   ü§î I didn't find 'cold', but did you mean:
      1. cold_hands_and_feets
   ‚úÖ Accepted: 'cold_hands_and_feets'
   ü§î I didn't find '', but did you mean:
      1. itching
      2. skin_rash
      3. nodal_skin_eruptions
      4. continuous_sneezing
      5. shivering
      6. chills
      7. joint_pain
      8. stomach_pain
      9. acidity
      10. ulcers_on_tongue
      11. muscle_wasting
      12. vomiting
      13. burning_micturition
      14. spotting_ urination
      15. fatigue
      16. weight_gain
      17. anxiety
      18. cold_hands_and_fe

In [26]:
import joblib

# 1. Save the Model (The Brain)
joblib.dump(clf, 'doctor_model.joblib')

# 2. Save the Symptom List (The Memory)
# We need this list later so the web app knows the order of columns!
joblib.dump(X.columns.tolist(), 'symptom_list.joblib')

print("‚úÖ Project Successfully Saved using Joblib!")
print("   - Model: doctor_model.joblib")
print("   - Symptoms: symptom_list.joblib")


‚úÖ Project Successfully Saved using Joblib!
   - Model: doctor_model.joblib
   - Symptoms: symptom_list.joblib
