In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
disease = pd.read_csv('Downloads/archive (8)/Training.csv')
disease.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [3]:
disease.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 5.0+ MB


In [4]:
disease.isnull().sum()

itching                 0
skin_rash               0
nodal_skin_eruptions    0
continuous_sneezing     0
shivering               0
                       ..
inflammatory_nails      0
blister                 0
red_sore_around_nose    0
yellow_crust_ooze       0
prognosis               0
Length: 133, dtype: int64

In [5]:
disease['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [6]:
#The data is already preprocessed so, we don't need to do data preprocessing 
#So, we will only apply machine learning models and train the model

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X = disease.drop('prognosis',axis=1)
y = disease['prognosis']

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y)
Y= le.transform(y)

In [10]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.3,random_state=20)

In [11]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score , confusion_matrix

In [12]:
models = {
    "SVM" : SVC(kernel = 'linear'),
    "RandomForest" : RandomForestClassifier(n_estimators = 100,random_state=42),
    "GradientBoost" : GradientBoostingClassifier(n_estimators = 100,random_state=42),
    "KNN" : KNeighborsClassifier(n_neighbors=5),
    "NaiveBayes" : MultinomialNB()  
}

In [13]:
#Now we are creating a for loop to check for each model
#So this for loop will take model_name and model from the dictionary models \
#generally we write models.key or models.value for getting either key or value
#But here we need both , so we are writing models.items to access both the values

for model_name, model in models.items():
    #Training the model
    model.fit(X_train,y_train)

    #Testing the model
    prediction = model.predict(X_test)

    #Checking the accuracy 
    accuracy = accuracy_score(y_test,prediction)
    print(f"{model_name}, Accuracy : {accuracy}" )

    #Calculating the confusion matrix 
    confusionmatrix = confusion_matrix(y_test,prediction)
    print(f"{model_name}, Confusion Matrix : {confusionmatrix}")
    print("\n")



SVM, Accuracy : 1.0
SVM, Confusion Matrix : [[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]


RandomForest, Accuracy : 1.0
RandomForest, Confusion Matrix : [[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]


GradientBoost, Accuracy : 1.0
GradientBoost, Confusion Matrix : [[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]


KNN, Accuracy : 1.0
KNN, Confusion Matrix : [[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]


NaiveBayes, Accuracy : 1.0
NaiveBayes, Confusion Matrix : [[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 3

In [14]:
#among all the above algorithms, will choose Random Forest 


In [15]:
RandomForest = RandomForestClassifier(n_estimators=100,random_state=42)
RandomForest.fit(X_train,y_train)
pred= RandomForest.predict(X_test)
accuracy_score(y_test,pred)

1.0

In [20]:
#Saving the model 
import pickle
#pickle.dump(RandomForest,open("RandomForest.pkl","wb"))
#for saving we use wb i.e write and for loading the file we use rb
#After saving commenting this cell of code for not to save the file again and again

In [19]:
#load the model

In [21]:
RandomForest = pickle.load(open("RandomForest.pkl","rb"))

In [29]:
#testing the model
RandomForest.predict(X_test.iloc[0].values.reshape(1,-1))

#X_test.iloc[0].values.reshape(1,-1) , this code is to get the row in X_test 
#then we give it to the model and predict 



array([40])

In [31]:
pred[0]

#here we are cheking whether in the original and predicted outcome is same or not

40

In [32]:
print("Original Outcome: ",RandomForest.predict(X_test.iloc[0].values.reshape(1,-1)) )
print("Predicted Outcome: ", pred[0])

Original Outcome:  [40]
Predicted Outcome:  40




In [33]:
#The model is working well
#now we are applying logic for recommendation

In [35]:
sym = pd.read_csv("Downloads/archive (8)/symtoms_df.csv")
precautions = pd.read_csv("Downloads/archive (8)/precautions_df.csv")
medications = pd.read_csv("Downloads/archive (8)/medications.csv")
diet = pd.read_csv("Downloads/archive (8)/diets.csv")
description = pd.read_csv("Downloads/archive (8)/description.csv")
workout = pd.read_csv("Downloads/archive (8)/workout_df.csv")

In [61]:
symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
disease_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}

#function to predict disease based on the symptoms given by the user using the model
def predicted_values(patient_symptoms):

    #first of all creating an array of zeros which contains the no of values equal to symptons dict i.e 132
    input_vector= np.zeros(len(symptoms_dict))


    #Now creating a loop for each symptom given by the user
    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1  #this line of code will search the patient symptoms in the symptoms dict and set them as 1 in input vector
    return disease_list[RandomForest.predict([input_vector])[0]] 
    #function will return the disease from the diseases list which is predicted by the model based on the input vector


In [62]:
#Now testing the predicted_vales function
symptoms = input("Enter your symptoms......")
#we get the input in string so converting it into list ans splitting the input 
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [sym.strip("[]' ") for sym in user_symptoms]
predicted_disease = predicted_values(user_symptoms)

Enter your symptoms...... shivering, chills,itching,skin_rash




In [63]:
predicted_disease

'Allergy'

In [69]:
workout.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 410 entries, 0 to 409
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Unnamed: 0.1  410 non-null    int64 
 1   Unnamed: 0    410 non-null    int64 
 2   disease       410 non-null    object
 3   workout       410 non-null    object
dtypes: int64(2), object(2)
memory usage: 12.9+ KB


In [72]:
#Now creating the function that will give recommendation like medication, diet , workout and many more based on the predicted disease
def recommendation_func(disease):
     #For description of the disease we will use 'description' dataset
    desc = description[description['Disease'] == disease]['Description']
    desc = " ".join([w for w in desc]) #to show the full description

    pre = precautions[precautions['Disease']== disease][['Precaution_1','Precaution_2','Precaution_3','Precaution_4']]
    pre = [col for col in pre.values]

    med = medications[medications['Disease'] == disease]['Medication']
    med = [med for med in med.values]

    diets = diet[diet['Disease'] == disease]['Diet']
    diets = [diets for diets in diets.values]

    workouts  = workout[workout['disease']== disease]['workout']


    return desc,pre,med,diets,workouts
     

In [76]:
#now checking this function using the above test case

desc, pre, med, die, wrkout = recommendation_func(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(desc)
print("=================precautions==================")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("=================medications==================")
for m_i in med:
    print(i, ": ", m_i)
    i += 1

print("=================workout==================")
for w_i in wrkout:
    print(i, ": ", w_i)
    i += 1

print("=================diets==================")
for d_i in die:
    print(i, ": ", d_i)
    i += 1

Allergy
Allergy is an immune system reaction to a substance in the environment.
1 :  apply calamine
2 :  cover area with bandage
3 :  nan
4 :  use ice to compress itching
5 :  ['Antihistamines', 'Decongestants', 'Epinephrine', 'Corticosteroids', 'Immunotherapy']
6 :  Avoid allergenic foods
7 :  Consume anti-inflammatory foods
8 :  Include omega-3 fatty acids
9 :  Stay hydrated
10 :  Eat foods rich in vitamin C
11 :  Include quercetin-rich foods
12 :  Consume local honey
13 :  Limit processed foods
14 :  Include ginger in diet
15 :  Avoid artificial additives
16 :  ['Elimination Diet', 'Omega-3-rich foods', 'Vitamin C-rich foods', 'Quercetin-rich foods', 'Probiotics']


In [3]:
#now we are done with the recommendation
#now deploying using pycharm flask app
import sklearn
print(sklearn.__version__)

1.2.2
