In [27]:
import pandas as pd

# Load the training dataset
dataset = pd.read_csv("Train.csv")

# ✅ Select only the first 86 features + target
X = dataset.iloc[:, 1:-1]  # 86 features
y = dataset['prognosis']

# Check the number of features
print("Number of Features Used for Training:", X.shape[1])  # Should print 86


Number of Features Used for Training: 131


In [28]:
dataset.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [29]:
from sklearn.model_selection import train_test_split #convert to training and testing datasets
from sklearn.preprocessing import LabelEncoder #categories into numbers

X=dataset.drop("prognosis",axis=1)#row-wise then 0
y=dataset['prognosis']

# ecoding prognonsis
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

In [30]:
X_train, X_test, y_train, y_test= train_test_split(X,Y, test_size=0.3, random_state=20)
#0.7 or 70% data in training and test 0.3 or 30% data in testing 
#random state is used to shuffle the data

In [31]:
#training model
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
#random forest train trees in parallel and gradient booster train trees sequentially
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, confusion_matrix # for accuracy
import numpy as np

#create a dictionary to store models
models = {
    'SVC': SVC(kernel='linear'),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'KNeighbors': KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

# Loop through the models, train, test, and print results
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"{model_name} Accuracy: {accuracy}")

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm, separator=', '))
    

SVC Accuracy: 1.0
SVC Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
RandomForest Accuracy: 1.0
RandomForest Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
GradientBoosting Accuracy: 1.0
GradientBoosting Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
KNeighbors Accuracy: 1.0
KNeighbors Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,

In [32]:
#single prediction model
#svc
svc = SVC(kernel='linear') #initialize
svc.fit(X_train,y_train) 
ypred = svc.predict(X_test)#makes predictions on test data
accuracy_score(y_test,ypred)

1.0

In [33]:
# save svc
import pickle
pickle.dump(svc,open('svc.pkl','wb'))# saves an object to a file

# load model
svc = pickle.load(open('svc.pkl','rb'))

In [34]:
# test 1:
print("predicted disease :",svc.predict(X_test.iloc[0].values.reshape(1,-1)))
#seclects first row from X_test (.values converts pandas series into numpy array)
#(reshaps 1d array into 2d array)
print("Actual Disease :", y_test[0])

predicted disease : [40]
Actual Disease : 40




In [35]:
# test 2:
print("predicted disease :",svc.predict(X_test.iloc[100].values.reshape(1,-1)))
print("Actual Disease :", y_test[100])

predicted disease : [39]
Actual Disease : 39




In [36]:
# ✅ Load the dataset and use only 86 features
train_data = pd.read_csv("Train.csv")
selected_features = train_data.columns[1:87]  # ✅ Select only 86 symptom columns

# ✅ Create the dictionary with only 86 features
symptoms_dict = {symptom: index for index, symptom in enumerate(selected_features)}


In [37]:
#load database
sym_des = pd.read_csv("symptoms_df.csv")
precautions = pd.read_csv("precautions_df.csv")
workout = pd.read_csv("workout_df.csv")
description = pd.read_csv("description.csv")
medications = pd.read_csv('medications.csv')
diets = pd.read_csv("diets.csv")

In [38]:
symptoms_dict = {
    0: "itching",
    1: "skin_rash",
    2: "nodal_skin_eruptions",
    3: "continuous_sneezing",
    4: "shivering",
    5: "chills",
    6: "joint_pain",
    7: "stomach_pain",
    8: "acidity",
    9: "ulcers_on_tongue",
    10: "muscle_wasting",
    11: "vomiting",
    12: "burning_micturition",
    13: "spotting_urination",
    14: "fatigue",
    15: "weight_gain",
    16: "anxiety",
    17: "cold_hands_and_feets",
    18: "mood_swings",
    19: "weight_loss",
    20: "restlessness",
    21: "lethargy",
    22: "patches_in_throat",
    23: "irregular_sugar_level",
    24: "cough",
    25: "high_fever",
    26: "sunken_eyes",
    27: "breathlessness",
    28: "sweating",
    29: "dehydration",
    30: "indigestion",
    31: "headache",
    32: "yellowish_skin",
    33: "dark_urine",
    34: "nausea",
    35: "loss_of_appetite",
    36: "pain_behind_the_eyes",
    37: "back_pain",
    38: "constipation",
    39: "abdominal_pain",
    40: "diarrhoea",
    41: "mild_fever",
    42: "yellow_urine",
    43: "yellowing_of_eyes",
    44: "acute_liver_failure",
    45: "fluid_overload",
    46: "swelling_of_stomach",
    47: "swelled_lymph_nodes",
    48: "malaise",
    49: "blurred_and_distorted_vision",
    50: "phlegm",
    51: "throat_irritation",
    52: "redness_of_eyes",
    53: "sinus_pressure",
    54: "runny_nose",
    55: "congestion",
    56: "chest_pain",
    57: "weakness_in_limbs",
    58: "fast_heart_rate",
    59: "pain_during_bowel_movements",
    60: "pain_in_anal_region",
    61: "bloody_stool",
    62: "irritation_in_anus",
    63: "neck_pain",
    64: "dizziness",
    65: "cramps",
    66: "bruising",
    67: "obesity",
    68: "swollen_legs",
    69: "swollen_blood_vessels",
    70: "puffy_face_and_eyes",
    71: "enlarged_thyroid",
    72: "brittle_nails",
    73: "swollen_extremeties",
    74: "excessive_hunger",
    75: "extra_marital_contacts",
    76: "drying_and_tingling_lips",
    77: "slurred_speech",
    78: "knee_pain",
    79: "hip_joint_pain",
    80: "muscle_weakness",
    81: "stiff_neck",
    82: "swelling_joints",
    83: "movement_stiffness",
    84: "spinning_movements",
    85: "loss_of_balance",
    86: "unsteadiness",
    87: "weakness_of_one_body_side",
    88: "loss_of_smell",
    89: "bladder_discomfort",
    90: "foul_smell_of_urine",
    91: "continuous_feel_of_urine",
    92: "passage_of_gases",
    93: "internal_itching",
    94: "toxic_look_(typhos)",
    95: "depression",
    96: "irritability",
    97: "muscle_pain",
    98: "altered_sensorium",
    99: "red_spots_over_body",
    100: "belly_pain",
    101: "abnormal_menstruation",
    102: "dischromic_patches",
    103: "watering_from_eyes",
    104: "increased_appetite",
    105: "polyuria",
    106: "family_history",
    107: "mucoid_sputum",
    108: "rusty_sputum",
    109: "lack_of_concentration",
    110: "visual_disturbances",
    111: "receiving_blood_transfusion",
    112: "receiving_unsterile_injections",
    113: "coma",
    114: "stomach_bleeding",
    115: "distention_of_abdomen",
    116: "history_of_alcohol_consumption",
    117: "blood_in_sputum",
    118: "prominent_veins_on_calf",
    119: "palpitations",
    120: "painful_walking",
    121: "pus_filled_pimples",
    122: "blackheads",
    123: "scurring",
    124: "skin_peeling",
    125: "silver_like_dusting",
    126: "small_dents_in_nails",
    127: "inflammatory_nails",
    128: "blister",
    129: "red_sore_around_nose",
    130: "yellow_crust_ooze",
    131: "prognosis"
}

diseases_list = {
    0: "(vertigo) Paroymsal Positional Vertigo",
    1: "AIDS",
    2: "Acne",
    3: "Alcoholic hepatitis",
    4: "Allergy",
    5: "Arthritis",
    6: "Bronchial Asthma",
    7: "Cervical spondylosis",
    8: "Chicken pox",
    9: "Chronic cholestasis",
    10: "Common Cold",
    11: "Dengue",
    12: "Diabetes",
    13: "Dimorphic hemmorhoids (piles)",
    14: "Drug Reaction",
    15: "Fungal infection",
    16: "GERD",
    17: "Gastroenteritis",
    18: "Heart attack",
    19: "Hepatitis B",
    20: "Hepatitis C",
    21: "Hepatitis D",
    22: "Hepatitis E",
    23: "Hypertension",
    24: "Hyperthyroidism",
    25: "Hypoglycemia",
    26: "Hypothyroidism",
    27: "Impetigo",
    28: "Jaundice",
    29: "Malaria",
    30: "Migraine",
    31: "Osteoarthritis",
    32: "Paralysis (brain hemorrhage)",
    33: "Peptic ulcer disease",
    34: "Pneumonia",
    35: "Psoriasis",
    36: "Tuberculosis",
    37: "Typhoid",
    38: "Urinary tract infection",
    39: "Varicose veins",
    40: "Hepatitis A"
}

#============================================================
# custome and helping functions
#==========================helper funtions================
def helper(dis):
    desc = description[description['Disease'] == dis]['Description']
    desc = " ".join([w for w in desc])

    pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [col for col in pre.values]

    med = medications[medications['Disease'] == dis]['Medication']
    med = [med for med in med.values]

    die = diets[diets['Disease'] == dis]['Diet']
    die = [die for die in die.values]

    wrkout = workout[workout['disease'] == dis] ['workout']


    return desc,pre,med,die,wrkout

In [39]:
def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(132)  # Ensure 132 features are always provided

    for symptom in patient_symptoms:
        if symptom in symptoms_dict.values():  # Ensure correct lookup
            index = list(symptoms_dict.values()).index(symptom)
            input_vector[index] = 1

    predicted_disease = diseases_list[svc.predict([input_vector])[0]]
    return predicted_disease

In [40]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
import numpy as np

# Get user input
symptoms = input("Enter your symptoms (comma-separated): ")
user_symptoms = [s.strip("[]' ") for s in symptoms.split(',')]

# Ensure required functions are available
try:
    predicted_disease = get_predicted_value(user_symptoms)
    desc, pre, med, die, wrkout = helper(predicted_disease)
    
    print("================= Predicted Disease =============")
    print(predicted_disease)
    print("================= Description ==================")
    print(desc)
    
    print("================= Precautions ==================")
    for i, p_i in enumerate(pre, start=1):  # Directly iterate if `pre` is a list
        print(f"{i}: {p_i}")

    print("================= Medications ==================")
    for i, m_i in enumerate(med, start=1):
        print(f"{i}: {m_i}")

    print("================= Workout ==================")
    for i, w_i in enumerate(wrkout, start=1):
        print(f"{i}: {w_i}")

    print("================= Diets ==================")
    for i, d_i in enumerate(die, start=1):
        print(f"{i}: {d_i}")

except NameError as e:
    print(f"Error: {e}. Ensure `get_predicted_value` and `helper` are defined.")
except Exception as e:
    print(f"Unexpected error: {e}")


Paralysis (brain hemorrhage)
Paralysis (brain hemorrhage) refers to the loss of muscle function due to bleeding in the brain.
1: ['massage' 'eat healthy' 'exercise' 'consult doctor']
1: ['Blood thinners', 'Clot-dissolving medications', 'Anticonvulsants', 'Physical therapy', 'Occupational therapy']
1: Follow a balanced and nutritious diet
2: Include lean proteins
3: Consume nutrient-rich foods
4: Stay hydrated
5: Include healthy fats
6: Limit sugary foods and beverages
7: Include antioxidants
8: Consume foods rich in vitamin K
9: Consult a healthcare professional
10: Manage stress
1: ['Heart-Healthy Diet', 'Low-sodium foods', 'Fruits and vegetables', 'Whole grains', 'Lean proteins']




In [43]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # yellow_crust_ooze,red_sore_around_nose,small_dents_in_nails,inflammatory_nails,blister
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)

desc, pre, med, die, wrkout = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("=================medications==================")
for m_i in med:
    print(i, ": ", m_i)
    i += 1

print("=================workout==================")
for w_i in wrkout:
    print(i, ": ", w_i)
    i += 1

print("=================diets==================")
for d_i in die:
    print(i, ": ", d_i)
    i += 1


Fungal infection
Fungal infection is a common skin condition caused by fungi.
1 :  bath twice
2 :  use detol or neem in bathing water
3 :  keep infected area dry
4 :  use clean cloths
5 :  ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']
6 :  Avoid sugary foods
7 :  Consume probiotics
8 :  Increase intake of garlic
9 :  Include yogurt in diet
10 :  Limit processed foods
11 :  Stay hydrated
12 :  Consume green tea
13 :  Eat foods rich in zinc
14 :  Include turmeric in diet
15 :  Eat fruits and vegetables
16 :  ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']




In [42]:
# let's use pycharm flask app
# but install this version in pycharm
import sklearn
print(sklearn.__version__)

1.2.2
