In [1]:
import re

import pandas as pd

In [2]:
df = pd.read_csv('Training.csv')

In [3]:
df.shape

(4920, 133)

In [4]:
df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [5]:
df['prognosis'].unique().shape

(41,)

In [6]:
df['prognosis'].value_counts()

prognosis
Fungal infection                           120
Allergy                                    120
GERD                                       120
Chronic cholestasis                        120
Drug Reaction                              120
Peptic ulcer diseae                        120
AIDS                                       120
Diabetes                                   120
Gastroenteritis                            120
Bronchial Asthma                           120
Hypertension                               120
Migraine                                   120
Cervical spondylosis                       120
Paralysis (brain hemorrhage)               120
Jaundice                                   120
Malaria                                    120
Chicken pox                                120
Dengue                                     120
Typhoid                                    120
hepatitis A                                120
Hepatitis B                                120
Hep

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [8]:
x = df.drop(columns=['prognosis'])
y = df['prognosis']

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [10]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((3444, 132), (3444,), (1476, 132), (1476,))

In [11]:
df.isnull().sum()

itching                 0
skin_rash               0
nodal_skin_eruptions    0
continuous_sneezing     0
shivering               0
                       ..
inflammatory_nails      0
blister                 0
red_sore_around_nose    0
yellow_crust_ooze       0
prognosis               0
Length: 133, dtype: int64

In [12]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier, ExtraTreesClassifier,
    GradientBoostingClassifier, AdaBoostClassifier
)
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

model_dict = {
    'logistic_reg': LogisticRegression(),
    'svc': SVC(probability=True),
    'knn': KNeighborsClassifier(),
    'decision_tree': DecisionTreeClassifier(),
    'random_forest': RandomForestClassifier(),
    'extra_trees': ExtraTreesClassifier(),
    'gradient_boosting': GradientBoostingClassifier(),
    'adaboost': AdaBoostClassifier(algorithm='SAMME'),
    'naive_bayes': GaussianNB(),
    'xgboost': XGBClassifier()
}

In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
model_confusion = {}
def evaluate_models(x_train, x_test, y_train, y_test, model_dict):

    results = []

    for name, model in model_dict.items():
        try:
            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)

            acc = accuracy_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred, average='macro', zero_division=0)
            rec = recall_score(y_test, y_pred, average='macro', zero_division=0)
            f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)

            results.append({
                'Model': name,
                'Accuracy': acc,
                'Precision': prec,
                'Recall': rec,
                'F1 Score': f1
            })
            model_confusion[name] = confusion_matrix(y_test, y_pred)
        except Exception as e:
            print(f"Model {name} failed with error: {e}")


    return results


In [15]:
results = evaluate_models(x_train, x_test, y_train, y_test, model_dict)

In [16]:
pd.DataFrame(results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score'])

Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
0,logistic_reg,1.0,1.0,1.0,1.0
1,svc,1.0,1.0,1.0,1.0
2,knn,1.0,1.0,1.0,1.0
3,decision_tree,1.0,1.0,1.0,1.0
4,random_forest,1.0,1.0,1.0,1.0
5,extra_trees,1.0,1.0,1.0,1.0
6,gradient_boosting,1.0,1.0,1.0,1.0
7,adaboost,0.102981,0.098188,0.110644,0.092657
8,naive_bayes,1.0,1.0,1.0,1.0
9,xgboost,1.0,1.0,1.0,1.0


In [17]:
for model_name, matrix in model_confusion.items():
    print(model_name, matrix)

logistic_reg [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
svc [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
knn [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
decision_tree [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
random_forest [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
extra_trees [[32  0  0 ...  0  0  0]
 [ 0 39  0 ...  0  0  0]
 [ 0  0 41 ...  0  0  0]
 ...
 [ 0  0  0 ... 36  0  0]
 [ 0  0  0 ...  0 37  0]
 [ 0  0  0 ...  0  0 39]]
gra

In [18]:
svc = SVC(probability=True)
svc.fit(x_train, y_train)
y_pred = svc.predict(x_test)
acc = accuracy_score(y_test, y_pred)
acc

1.0

In [19]:
import pickle
pickle.dump(svc, open('svc.pkl', 'wb'))

In [20]:
svc = pickle.load(open('svc.pkl', 'rb'))

In [21]:
x_test

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze
373,0,1,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
4916,0,1,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,0,0
1550,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3081,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3857,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3649,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1694,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4767,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3721,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
pred = svc.predict(x_test.iloc[0].values.reshape(1, -1))



In [23]:
print(pred)

[2]


In [24]:
y_test[0]

np.int64(2)

In [25]:
print(svc.predict(x_test.iloc[10].values.reshape(1, -1)), y_test[10])

[38] 38




In [26]:
symptoms_df = pd.read_csv('symptoms.csv')

In [27]:
precautions_df = pd.read_csv('precautions.csv')

In [28]:
symptoms_df.head()

Unnamed: 0.1,Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4
0,0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches
1,1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,
2,2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,
3,3,Fungal infection,itching,skin_rash,dischromic _patches,
4,4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,


In [29]:
precautions_df.head()

Unnamed: 0.1,Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


In [30]:
workouts_df = pd.read_csv('workouts.csv')

In [31]:
description_df = pd.read_csv('description.csv')

In [32]:
medications_df = pd.read_csv('medications.csv')

In [33]:
diets_df = pd.read_csv('diets.csv')

In [34]:
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)


In [35]:
label_encoder.inverse_transform([2])[0]

'Acne'

In [36]:
label_encoder.transform(['Acne'])[0]

np.int64(2)

In [37]:
import numpy as np

def get_predicted_value(patient_symptoms):
    output = []
    for item in df.columns[:-1]:
        if item in patient_symptoms:
            output.append(1)
        else:
            output.append(0)
    return label_encoder.inverse_transform([svc.predict([output])])[0]


In [38]:
user_symptoms = ['joint_pain', 'chills', 'acidity']

In [39]:
predicted_disease = get_predicted_value(user_symptoms)

  y = column_or_1d(y, warn=True)


In [40]:
predicted_disease

'Allergy'

In [41]:
# find description
desc = description_df[description_df['Disease'] == predicted_disease]['Description']

In [42]:
desc = " ".join(desc.values)

In [43]:
# find precautions
pre = precautions_df[precautions_df['Disease'] == predicted_disease][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]

In [44]:
pre = [col for col in pre.values]

In [45]:
pre

[array(['apply calamine', 'cover area with bandage', nan,
        'use ice to compress itching'], dtype=object)]

In [46]:
import ast
import warnings
warnings.filterwarnings('ignore')

In [47]:
def helper(disease) :
    description = " ".join(description_df[description_df['Disease'] == disease]['Description'])
    precaution = precautions_df[precautions_df['Disease'] == disease][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']].values.tolist()[0]
    medication = medications_df[medications_df['Disease'] == disease]['Medication'].values[0]
    diet = diets_df[diets_df['Disease'] == disease]['Diet'].values[0]
    workout = workouts_df[workouts_df['disease'] == disease]['workout'].values.tolist()
    print('-----------------------------Predicted Disease---------------------------')
    print()
    print(disease)
    print()
    print('-----------------------------Description---------------------------------')
    print()
    print(description)
    print()
    print('-----------------------------Precaution----------------------------------')
    print()
    i=1
    for p_i in precaution:
        if p_i is np.nan:
            continue
        print(f"{i}. {p_i}")
        i += 1
    print()
    print('-----------------------------Medication----------------------------------')
    print()
    i = 1
    for m_i in ast.literal_eval(medication):
        if m_i is np.nan:
            continue
        print(f"{i}. {m_i}")
        i += 1
    print()
    print('-----------------------------Diet----------------------------------------')
    print()
    i = 1
    for d_i in ast.literal_eval(diet):
        if d_i is np.nan:
            continue
        print(f"{i}. {d_i}")
        i += 1
    print()
    print('-----------------------------Workout----------------------------------------')
    print()
    i = 1
    for w_i in workout:
        if w_i is np.nan:
            continue
        print(f"{i}. {w_i}")
        i += 1




In [48]:
helper(get_predicted_value(['itching', 'skin_rash']))

-----------------------------Predicted Disease---------------------------

Fungal infection

-----------------------------Description---------------------------------

Fungal infection is a common skin condition caused by fungi.

-----------------------------Precaution----------------------------------

1. bath twice
2. use detol or neem in bathing water
3. keep infected area dry
4. use clean cloths

-----------------------------Medication----------------------------------

1. Antifungal Cream
2. Fluconazole
3. Terbinafine
4. Clotrimazole
5. Ketoconazole

-----------------------------Diet----------------------------------------

1. Antifungal Diet
2. Probiotics
3. Garlic
4. Coconut oil
5. Turmeric

-----------------------------Workout----------------------------------------

1. Avoid sugary foods
2. Consume probiotics
3. Increase intake of garlic
4. Include yogurt in diet
5. Limit processed foods
6. Stay hydrated
7. Consume green tea
8. Eat foods rich in zinc
9. Include turmeric in die

In [58]:
def bring_name_in_right_format(name) :
    if '_' in name:
        return " ".join([a.strip() for a in name.split('_')])
    return name


In [59]:
correct_name = []
for col in df.columns:
    correct_name.append(bring_name_in_right_format(col))

In [60]:
correct_name

['itching',
 'skin rash',
 'nodal skin eruptions',
 'continuous sneezing',
 'shivering',
 'chills',
 'joint pain',
 'stomach pain',
 'acidity',
 'ulcers on tongue',
 'muscle wasting',
 'vomiting',
 'burning micturition',
 'spotting urination',
 'fatigue',
 'weight gain',
 'anxiety',
 'cold hands and feets',
 'mood swings',
 'weight loss',
 'restlessness',
 'lethargy',
 'patches in throat',
 'irregular sugar level',
 'cough',
 'high fever',
 'sunken eyes',
 'breathlessness',
 'sweating',
 'dehydration',
 'indigestion',
 'headache',
 'yellowish skin',
 'dark urine',
 'nausea',
 'loss of appetite',
 'pain behind the eyes',
 'back pain',
 'constipation',
 'abdominal pain',
 'diarrhoea',
 'mild fever',
 'yellow urine',
 'yellowing of eyes',
 'acute liver failure',
 'fluid overload',
 'swelling of stomach',
 'swelled lymph nodes',
 'malaise',
 'blurred and distorted vision',
 'phlegm',
 'throat irritation',
 'redness of eyes',
 'sinus pressure',
 'runny nose',
 'congestion',
 'chest pain',
 