# Title: Personalized Medical Recommendation System

# Description:
> Welcome to Personalized Medical Recommendation System , a powerful platform designed to assist users in understanding
> and managing their health. The system analyzes user-inout symptoms to predict potential diseases accurately.

In [None]:
# load dataset and tools

In [3]:
import pandas as pd

In [5]:
dataset = pd.read_csv("Training.csv")

In [7]:
dataset.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [9]:
dataset.shape

(4920, 133)

In [11]:
len(dataset['prognosis'].unique())                                                                                                                                                                                                        

41

In [13]:
dataset['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

# Train test Split

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder                                                                                                              

In [17]:
X = dataset.drop("prognosis",axis=1)
y= dataset['prognosis']

> Encoding String to integer

In [21]:
le = LabelEncoder()
le.fit(y)
Y=le.transform(y)

In [23]:
y

0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: prognosis, Length: 4920, dtype: object

In [25]:
Y

array([15, 15, 15, ..., 38, 35, 27])

> spliting dataset into four set , It will take all the input variables values from X AND PUT 70% OF data to X_train and                     
> and remaining 30% will taken by X_test, Similary It will take all the input variables values from y AND PUT 70% OF data to y_train and                
> and remaining 30% will taken by y_test
  

In [28]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.3 , random_state=20)

In [30]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((3444, 132), (1476, 132), (3444,), (1476,))

# Training top models

In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# create a dictionary  to store models
models = {"SVC":SVC(kernel='linear') , 
          "RandomForest": RandomForestClassifier(n_estimators=100,random_state=42), 
          "GrandientBoosting":GradientBoostingClassifier(n_estimators=100,random_state=42),
          "KNeighbors":KNeighborsClassifier(n_neighbors=5),
          "MultinomialNB":MultinomialNB() }

for model_name , model in models.items():
    #train model
    model.fit(X_train, y_train)
    
    #test model
    predictions = model.predict(X_test)

    #calculate accuracy
    accuracy=accuracy_score(y_test,predictions)

    #calculate confusion matrix
    cm = confusion_matrix(y_test,predictions)

    print(f"{model_name} accuracy : {accuracy}")      
    print(f"{model_name} Confusion Matrix : ")
    print(np.array2string(cm,separator =" ,"))
    
                      
                      
  
    

<h1> Single Prediction </h1>


In [41]:
svc  = SVC(kernel ='linear')
svc.fit(X_train,y_train)
ypred=svc.predict(X_test)
accuracy_score(y_test,ypred)

1.0

In [43]:
#saving model
import pickle
# pickle.dump(svc,open("models/svc.pkl","wb"))

In [45]:
#load model

svc = pickle.load(open("models/svc.pkl",'rb'))

In [47]:
 # thew below code will return the value  of index associated with  a disease corresponding to a particular person
svc.predict(X_test.iloc[0].values.reshape(1,-1))                    
                      
                      
                      
                      
  



array([40])

In [49]:
# if y_test and x_test 0th index are same model is correct
y_test[0]

40

In [55]:
#test 1
print("Predicted Label: " ,svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Label :" , y_test[0])                    
                      
                      
  


Predicted Label:  [40]
Actual Label : 40




In [57]:
#test 2
print("Predicted Label: " ,svc.predict(X_test.iloc[10].values.reshape(1,-1)))  #10th index pe jo pure symptoms ka record hai uska disease predict kro
print("Actual Label :" , y_test[10])

Predicted Label:  [20]
Actual Label : 20




<h1>Recommendation System and prediction</h1>

<h2>load database and use logic for recommendations</h2>

In [81]:
sym_des = pd.read_csv('datasets/symtoms_df.csv')
precautions = pd.read_csv('datasets/precautions_df.csv')      
workout = pd.read_csv('datasets/workout_df.csv')         
description = pd.read_csv('datasets/description.csv')
medication = pd.read_csv('datasets/medications.csv')
diets = pd.read_csv('datasets/diets.csv')
                      
                      
  
                      
                      
#rememeber all the diseases are taken out from prognosis 

In [85]:
diets

Unnamed: 0,Disease,Diet
0,Fungal infection,"['Antifungal Diet', 'Probiotics', 'Garlic', 'C..."
1,Allergy,"['Elimination Diet', 'Omega-3-rich foods', 'Vi..."
2,GERD,"['Low-Acid Diet', 'Fiber-rich foods', 'Ginger'..."
3,Chronic cholestasis,"['Low-Fat Diet', 'High-Fiber Diet', 'Lean prot..."
4,Drug Reaction,"['Antihistamine Diet', 'Omega-3-rich foods', '..."
5,Peptic ulcer disease,"['Low-Acid Diet', 'Fiber-rich foods', 'Ginger'..."
6,AIDS,"['Balanced Diet', 'Protein-rich foods', 'Fruit..."
7,Diabetes,"['Low-Glycemic Diet', 'Fiber-rich foods', 'Lea..."
8,Gastroenteritis,"['Bland Diet', 'Bananas', 'Rice', 'Applesauce'..."
9,Bronchial Asthma,"['Anti-Inflammatory Diet', 'Omega-3-rich foods..."
