In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import ast

In [7]:
data = pd.read_csv("datasets/Training.csv")
testing_data = pd.read_csv("datasets/Testing.csv")

In [9]:
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis,Unnamed: 133
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,


In [11]:
data = data.drop("Unnamed: 133", axis=1)


In [13]:
X_train = data.drop('prognosis', axis=1)  # Assuming 'prognosis' is the target variable
y_train = data['prognosis']

In [15]:
LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Standardize numerical features (if applicable)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [17]:
# Preprocess the testing data (using the same scaler and encoder)
X_test = testing_data.drop('prognosis', axis=1)
y_test = testing_data['prognosis']

X_test_scaled = scaler.transform(X_test)
y_test_encoded = label_encoder.transform(y_test)


In [19]:
X_test_scaled

array([[ 2.50132708,  2.29336913,  6.6749948 , ..., -0.15401412,
        -0.15401412, -0.15401412],
       [-0.39978778, -0.4360397 , -0.14981285, ..., -0.15401412,
        -0.15401412, -0.15401412],
       [-0.39978778, -0.4360397 , -0.14981285, ..., -0.15401412,
        -0.15401412, -0.15401412],
       ...,
       [-0.39978778,  2.29336913, -0.14981285, ..., -0.15401412,
        -0.15401412, -0.15401412],
       [-0.39978778,  2.29336913, -0.14981285, ...,  6.49291111,
         6.49291111,  6.49291111],
       [ 2.50132708,  2.29336913, -0.14981285, ..., -0.15401412,
         6.49291111, -0.15401412]])

In [21]:
models = {
    "KNeighbors": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "Neural Network": MLPClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
}

results = {}
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train_encoded)
    y_pred_encoded = model.predict(testing_data.drop('prognosis', axis=1))
    y_pred = label_encoder.inverse_transform(y_pred_encoded)

    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')

    results[model_name] = {'accuracy': accuracy, 'recall': recall, 'f1': f1}

print("model trained succesfully!")






model trained succesfully!




In [23]:
# Print the accuracy, recall, and F1-score for each model
for model_name, metrics in results.items():
    print(f"{model_name}:")
    print(f"  Accuracy: {metrics['accuracy']:.4f}")
    print(f"  Recall: {metrics['recall']:.4f}")
    print(f"  F1-score: {metrics['f1']:.4f}")

KNeighbors:
  Accuracy: 0.1429
  Recall: 0.1429
  F1-score: 0.1429
Decision Tree:
  Accuracy: 0.2143
  Recall: 0.2143
  F1-score: 0.2143
Random Forest:
  Accuracy: 0.4048
  Recall: 0.4048
  F1-score: 0.4048
SVM:
  Accuracy: 0.2143
  Recall: 0.2143
  F1-score: 0.2143
Neural Network:
  Accuracy: 0.9762
  Recall: 0.9762
  F1-score: 0.9762
Gradient Boosting:
  Accuracy: 0.0714
  Recall: 0.0714
  F1-score: 0.0714


In [25]:
# Find Best Model
best_model_name = None
best_accuracy = 0
for model_name, results in results.items():
  accuracy = results['accuracy']
  if accuracy > best_accuracy:
    best_model_name = model_name
    best_accuracy = accuracy

print(f"Best Model by Accuracy: {best_model_name}")

# Train Best Model on All Training Data
best_model = models[best_model_name]
best_model.fit(X_train_scaled, y_train_encoded)

# Final Evaluation (Optional)
y_pred_encoded = best_model.predict(X_test_scaled)
y_pred = label_encoder.inverse_transform(y_pred_encoded)

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')

print(f"Final Performance of {best_model_name}:")
print(f"\tAccuracy: {accuracy}")
print(f"")

MLP = best_model

Best Model by Accuracy: Neural Network
Final Performance of Neural Network:
	Accuracy: 0.9761904761904762



In [29]:
# ... (Your existing code for finding the best model and training it)

# Save the final model with pickle
import pickle

with open('model/MLP.pkl', 'wb') as f:
    pickle.dump(MLP, f)

print("Best model saved successfully!")

Best model saved successfully!


In [31]:
le = LabelEncoder()
le.fit(y_test)
y_test_encoded = le.transform(y_test)

In [35]:
# Load the saved model
with open('model/MLP.pkl', 'rb') as f:
    best_model = pickle.load(f)

print("predicted disease :",MLP.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Disease :", y_test_encoded[0])

predicted disease : [15]
Actual Disease : 15


In [39]:
medications = pd.read_csv('datasets/medications.csv')
symptoms_df = pd.read_csv('datasets/symptoms.csv')
diseases_df = pd.read_csv('datasets/diseases.csv')

In [45]:
def helper(dis):

    med = medications[medications['Disease'] == dis]['Medication']
    med = [med for med in med.values]
    return ast.literal_eval(med[0])

    return med

In [47]:
symptoms_dict = pd.Series(symptoms_df.Index.values, index=symptoms_df.Symptom).to_dict()
diseases_list = pd.Series(diseases_df.Disease.values, index=diseases_df.Index).to_dict()

# Model Prediction function
def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict))
    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1
    return diseases_list[MLP.predict([input_vector])[0]]

In [55]:
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
symptoms = input("Enter your symptoms.......")
user_symptoms = [s.strip() for s in symptoms.split(',')]
# Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)

med = helper(predicted_disease)

print("=================predicted disease============")
print(predicted_disease)
print("=================medications==================")
i = 1
for m_i in med:
    print(i, ": ", m_i)
    i += 1

Enter your symptoms....... itching


Chronic cholestasis
1 :  Ursodeoxycholic acid
2 :  Cholestyramine
3 :  Methotrexate
4 :  Corticosteroids
5 :  Liver transplant


In [None]:
import sklearn
print(sklearn.__version__)

1.5.2
