In [1]:
import pandas as pd
import numpy as np

In [2]:
training_df = pd.read_csv('Training.csv')

In [3]:
training_df.shape

(4920, 133)

In [4]:
training_df['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [6]:
 #Train test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

X = training_df.drop('prognosis', axis = 1)
y = training_df['prognosis']

le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 20)

In [7]:
#Training Top models
from sklearn.datasets import make_classification
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

In [8]:
#Create a dictionary to store models
models = {"SVC":SVC(kernel = 'linear'),
         "RandomForest":RandomForestClassifier(n_estimators = 100, random_state = 42),
         "GrandientBoosting":GradientBoostingClassifier(n_estimators = 100, random_state = 42),
         "KNeighbors":KNeighborsClassifier(n_neighbors = 5),
         "MultinomialNB":MultinomialNB()
         }
for model_name, model in models.items():
    #train model
    model.fit(X_train,y_train)

    #test model
    predictions = model.predict(X_test)

    #calculate accuracy score
    accuracy = accuracy_score(y_test, predictions)

    #calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)

    print(f"{model_name} Accuracy is {accuracy}")
    print(f"{model_name} Confusion Matrix: ")
    print(np.array2string(cm, separator = ", "))



SVC Accuracy is 1.0
SVC Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
RandomForest Accuracy is 1.0
RandomForest Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
GrandientBoosting Accuracy is 1.0
GrandientBoosting Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
KNeighbors Accuracy is 1.0
KNeighbors Confusion Matrix: 
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0

In [9]:
#Single Predictions
svc = SVC(kernel = "linear")
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test, ypred)

1.0

In [19]:
#Saving model
import pickle

pickle.dump(svc, open("models/svc.pkl", 'wb'))

In [21]:
#Load model

svc = pickle.load(open("models/svc.pkl",'rb'))

In [10]:
#test no 1

print("Predicted Label: ", svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Label: ", y_test[0])

Predicted Label:  [40]
Actual Label:  40




In [11]:
#test no 2
print("Predicted Label: ", svc.predict(X_test.iloc[10].values.reshape(1,-1)))
print("Actual Label: ", y_test[10])

Predicted Label:  [20]
Actual Label:  20




# Medicine Recommendation System

In [12]:
#Load datasets
symptoms_desc = pd.read_csv('datasets/symtoms.csv')
precautions = pd.read_csv('datasets/precautions.csv')
workouts = pd.read_csv('datasets/workout.csv')
description = pd.read_csv('datasets/description.csv')
medication = pd.read_csv('datasets/medications.csv')
diets = pd.read_csv('datasets/diets.csv')

In [31]:
description.head()

Unnamed: 0,Disease,Description
0,Fungal infection,Fungal infection is a common skin condition ca...
1,Allergy,Allergy is an immune system reaction to a subs...
2,GERD,GERD (Gastroesophageal Reflux Disease) is a di...
3,Chronic cholestasis,Chronic cholestasis is a condition where bile ...
4,Drug Reaction,Drug Reaction occurs when the body reacts adve...


In [32]:
precautions.head()

Unnamed: 0.1,Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


In [33]:
medication.head()

Unnamed: 0,Disease,Medication
0,Fungal infection,"['Antifungal Cream', 'Fluconazole', 'Terbinafi..."
1,Allergy,"['Antihistamines', 'Decongestants', 'Epinephri..."
2,GERD,"['Proton Pump Inhibitors (PPIs)', 'H2 Blockers..."
3,Chronic cholestasis,"['Ursodeoxycholic acid', 'Cholestyramine', 'Me..."
4,Drug Reaction,"['Antihistamines', 'Epinephrine', 'Corticoster..."


In [34]:
diets.head()

Unnamed: 0,Disease,Diet
0,Fungal infection,"['Antifungal Diet', 'Probiotics', 'Garlic', 'C..."
1,Allergy,"['Elimination Diet', 'Omega-3-rich foods', 'Vi..."
2,GERD,"['Low-Acid Diet', 'Fiber-rich foods', 'Ginger'..."
3,Chronic cholestasis,"['Low-Fat Diet', 'High-Fiber Diet', 'Lean prot..."
4,Drug Reaction,"['Antihistamine Diet', 'Omega-3-rich foods', '..."


In [35]:
workouts.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,disease,workout
0,0,0,Fungal infection,Avoid sugary foods
1,1,1,Fungal infection,Consume probiotics
2,2,2,Fungal infection,Increase intake of garlic
3,3,3,Fungal infection,Include yogurt in diet
4,4,4,Fungal infection,Limit processed foods


In [13]:
#========== Helper Function =============
def helper(predDisease):
    description_ = description[description['Disease'] == predDisease]['Description']
    description_ = " ".join([w for w in description_ ])

    precautions_ = precautions[precautions['Disease'] == predDisease][['Precaution_1','Precaution_2','Precaution_3','Precaution_4']]
    precautions_ = [p for p in precautions_.values ]

    medication_ = medication[medication['Disease'] == predDisease]['Medication']
    medication_ = [m for m in medication_.values]

    diet_ = diets[diets['Disease'] == predDisease]['Diet']
    diet_ = [d for d in diet_.values]
    
    workout_ = workouts[workouts['disease'] == predDisease]['workout']

    return description_, precautions_, medication_, diet_, workout_


symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}

def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict))
    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1

    return diseases_list[svc.predict([input_vector])[0]]

In [14]:
#==== Test ====
symptoms = input("Enter symptoms of the patient:    ")
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [sym.strip("[] ' ") for sym in user_symptoms]

predicted_diseases = get_predicted_value(user_symptoms)

description_, precautions_,  medication_, diet_, workout_ = helper(predicted_diseases)

Enter symptoms of the patient:     shivering,joint_pain,muscle_wasting




In [31]:
#Print Results

print("-------------- Predicted Disease -------------")
print(predicted_diseases)
print("------------------ Description ----------------")
print(description_)
print("------------------ Precautions ----------------")
i = 1
for pre in precautions_[0]:
    print(i,': ',pre)
    i += 1
print("---------------- Medications ------------------")
j = 1
for med in medication_:
    print(j," : ", med)
    j += 1
print("-------------------- Diet ----------------------")
k = 1
for die in diet_:
    print(k," : ", die)
    k += 1
print("------------------ Workouts -------------------")
l = 1
for work in workout_:
    print(l," : ", work)
    l += 1

-------------- Predicted Disease -------------
Allergy
------------------ Description ----------------
Allergy is an immune system reaction to a substance in the environment.
------------------ Precautions ----------------
1 :  apply calamine
2 :  cover area with bandage
3 :  nan
4 :  use ice to compress itching
---------------- Medications ------------------
1  :  ['Antihistamines', 'Decongestants', 'Epinephrine', 'Corticosteroids', 'Immunotherapy']
-------------------- Diet ----------------------
1  :  ['Elimination Diet', 'Omega-3-rich foods', 'Vitamin C-rich foods', 'Quercetin-rich foods', 'Probiotics']
------------------ Workouts -------------------
1  :  Avoid allergenic foods
2  :  Consume anti-inflammatory foods
3  :  Include omega-3 fatty acids
4  :  Stay hydrated
5  :  Eat foods rich in vitamin C
6  :  Include quercetin-rich foods
7  :  Consume local honey
8  :  Limit processed foods
9  :  Include ginger in diet
10  :  Avoid artificial additives
