In [None]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix


In [None]:
from sklearn.svm import SVC

In [None]:
path = '../data/Training.csv'
df = pd.read_csv(path)

In [None]:
df.shape

In [None]:
df.sample(5)

In [None]:
len(df['prognosis'].unique())

# Splitting the data

In [None]:
dataset = df

In [None]:
X = dataset.drop('prognosis', axis = 1)
y = dataset['prognosis']

In [None]:
# encode y, which is string
le = LabelEncoder()
le.fit(y)
y = le.transform(y)

In [None]:
X_train, X_test, y_train , y_test = train_test_split(X, y, test_size = 0.3 , random_state = 20)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape



# Train some ML models

In [None]:
# create a dictionary to store models
models = {
    'SVC': SVC(kernel = 'linear'),
    'RandomForest': RandomForestClassifier(n_estimators = 100, random_state = 42),
    'KNeighbors' : KNeighborsClassifier(n_neighbors = 5),
    'MultinomialNB' : MultinomialNB(),
    'GradientBoosting' : GradientBoostingClassifier(n_estimators = 100, random_state = 42)
}

In [None]:
for model, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test,y_pred) # accuracy 

    cm = confusion_matrix (y_test, y_pred) # confusiion matrix

    print(f'{model} accuracy: {accuracy}')
    print(f"{model} Confusion matrix: ")
    print(np.array2string(cm, separator = ' '))


# all models have good performance


# choose one model : SVC

In [None]:
svc = SVC(kernel = 'linear')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
accuracy

# saving the model

In [None]:
import pickle 
pickle.dump(svc, open('../models/svc.pkl', 'wb'))

In [None]:
# Load the saved model 
svc = pickle.load(open('../models/svc.pkl', 'rb'))

In [None]:
# test nbr 2
print('Predicted label:' , svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print('actual disease:' , y_test[0])
    

In [None]:
y_test

In [None]:
# test nbr 2
print('Predicted label:' , svc.predict(X_test.iloc[12].values.reshape(1,-1)))
print('actual disease:' , y_test[12])


# Load database and use logic for recommendations

In [None]:
syms = pd.read_csv('../data/symtoms_df.csv')
precautions = pd.read_csv('../data/precautions_df.csv')
workout = pd.read_csv('../data/workout_df.csv')
description = pd.read_csv('../data/description.csv')
medications = pd.read_csv('../data/medications.csv')
diets = pd.read_csv('../data/diets.csv')

In [None]:
df.shape

In [None]:
# Create dictionary with symptoms as keys and index as values
# Get all columns except 'prognosis'
unique_symptoms = [col for col in dataset.columns if col != 'prognosis']
symptoms_dict = {symptom: i for i, symptom in enumerate(unique_symptoms)}
print(symptoms_dict)

In [None]:
# create a dictionary of individual disease
unique_disease = syms.Disease.unique()
diseases_list = {disease: i for i, disease in enumerate(unique_disease)}


def create_diseases_list(diseases_list):
    # Create a new dictionary to store the formatted output
    formatted_list = {nbr: disease for nbr, disease in diseases_list.items()}
    return formatted_list
diseases_list = create_diseases_list(diseases_list)


In [256]:
def helper (dis):
    descr = description[description['Disease'] == dis]['Description']
    descr = ' '.join(w for w in descr)

    prec = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    prec = [i for i in prec.values]

    med = medications[medications['Disease'] == dis]['Medication']
    med = ' '.join (m for m in med.values)

    die = diets[diets['Disease'] == dis]['Diet']
    die = [d for d in die.values]

    work = workout[workout['disease'] == dis]['workout']

    return descr, prec, med, die, work

In [254]:
#model prediction function
def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict)) ## an inout vector which holds zeros 


    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1
        svc.predict([input_vector])[0]

    return formatted_list[svc.predict([input_vector])[0]]

In [260]:
symptoms = input('enter symptoms:')
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [sym.strip('[]') for sym in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)
descr, prec, med, die, work = helper(predicted_disease)

enter symptoms: back_pain




[array(['have balanced diet', 'exercise', 'consult doctor', 'follow up'],
       dtype=object)]

In [266]:
print(predicted_disease)

Diabetes 


In [268]:
print(med)




In [None]:
descr = description[description['Disease'] == predicted_disease]['Description']
descr = ' '.join(w for w in descr)
descr

In [248]:
prec = precautions[precautions['Disease'] == predicted_disease][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
prec = [i for i in prec.values]
prec 

[array(['stop irritation', 'consult nearest hospital', 'stop taking drug',
        'follow up'], dtype=object)]

In [249]:
prec = ' '.join(str(p) for p in prec)
prec

"['stop irritation' 'consult nearest hospital' 'stop taking drug'\n 'follow up']"

In [None]:
# Extract unique diseases and create a dictionary with indices starting from 0
unique_disease = syms.Disease.unique()
diseases_list = {i: disease for i, disease in enumerate(unique_disease)}

# Output each number with the corresponding disease
for nbr, disease in diseases_list.items():
    print(f"{nbr} : '{disease}'")


In [None]:
formatted_list[38]

In [None]:
mild_fever, yellow_urine, yellowing_of_eyes, acute_liver_failure, fluid_overload, swelling_of_stomach

In [250]:
workout

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,disease,workout
0,0,0,Fungal infection,Avoid sugary foods
1,1,1,Fungal infection,Consume probiotics
2,2,2,Fungal infection,Increase intake of garlic
3,3,3,Fungal infection,Include yogurt in diet
4,4,4,Fungal infection,Limit processed foods
...,...,...,...,...
405,405,405,Impetigo,Consult a healthcare professional
406,406,406,Impetigo,Follow medical recommendations
407,407,407,Impetigo,Avoid scratching
408,408,408,Impetigo,Take prescribed antibiotics
