In [None]:
# Importing necessary libraries
import joblib
import csv
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:

# Loading the dataset
data = pd.read_csv("Training.csv")
df = pd.DataFrame(data)
# Separating features (symptoms) and target (disease)
X = data.iloc[:, :132].values
y = data.iloc[:, 132].values
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=35)

In [None]:
# Creating 4 different base classifiers
dt = DecisionTreeClassifier(random_state=35)
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=100, random_state=35)
nb = GaussianNB()

# Fitting all 4 base classifiers on the training data
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)
rf.fit(X_train, y_train)
nb.fit(X_train, y_train)


# Set the classes_ attribute of the classifiers
dt.classes_ = np.unique(y_train)
knn.classes_ = np.unique(y_train)
rf.classes_ = np.unique(y_train)
nb.classes_ = np.unique(y_train)
# Predicting the probabilities of the classes of the testing data using all 4 base classifiers
dt_pred = dt.predict_proba(X_test)
knn_pred = knn.predict_proba(X_test)
rf_pred = rf.predict_proba(X_test)
nb_pred = nb.predict_proba(X_test)

In [None]:

# Combining base model predictions using stacking ensemble method
estimators = [('dt', dt), ('knn', knn), ('rf', rf), ('nb', nb)]
sc = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
sc.fit(X_train, y_train)
sc.classes_ = np.unique(y_train)
sc_pred = sc.predict_proba(X_test)
# Taking the average of the probabilities of the classes for each classifier as the final prediction
hybrid_pred = (dt_pred + knn_pred + rf_pred + nb_pred + sc_pred) / 5
# Converting probabilities to class labels
hybrid_pred = [max(range(len(pred)), key=pred.__getitem__) for pred in hybrid_pred]
# Map the class labels to their corresponding values using the classes_ attribute
hybrid_pred = [sc.classes_[i] for i in hybrid_pred]
# Calculating the accuracy of the hybrid model
accuracy = accuracy_score(y_test, hybrid_pred)
print("Accuracy of the hybrid model:", accuracy)



In [None]:
# Save the hybrid model to a file
filename = 'hybrid_model.joblib'
joblib.dump(sc, filename)

In [None]:
# Load the trained hybrid model
import joblib
hybrid_model = joblib.load('hybrid_model.joblib')


indices = [i for i in range(132)]
symptoms = df.columns.values[:-1]
dictionary = dict(zip(symptoms, indices))
symptom=['blood_in_sputum','swelled_lymph_nodes','puffy_face_and_eyes','mild_fever','skin_rash']
user_input_symptoms = symptom
user_input_label = [0 for i in range(132)]
for i in user_input_symptoms:
    idx = dictionary[i]
    user_input_label[idx] = 1

user_input_label = np.array(user_input_label)
user_input_label = user_input_label.reshape((-1, 1)).transpose()


# Prepare the input data
custom_input = user_input_label

# Predict the probabilities of the classes for the input data using all the base classifiers and the stacking ensemble method
dt_prob = hybrid_model.named_estimators_['dt'].predict_proba(custom_input)
knn_prob = hybrid_model.named_estimators_['knn'].predict_proba(custom_input)
rf_prob = hybrid_model.named_estimators_['rf'].predict_proba(custom_input)
nb_prob = hybrid_model.named_estimators_['nb'].predict_proba(custom_input)
sc_prob = hybrid_model.final_estimator_.predict_proba(np.concatenate([dt_prob, knn_prob, rf_prob, nb_prob], axis=1))

# to get prediction on individual classifiers in the model
class_index = hybrid_model.named_estimators_['dt'].predict(custom_input)
class_name=hybrid_model.classes_[int(class_index)]
print(class_name)

# Take the average of the probabilities of the classes for each classifier as the final prediction
hybrid_prob = (dt_prob + knn_prob + rf_prob + nb_prob + sc_prob) / 5

# Convert the final probabilities to class labels by mapping the class labels to their corresponding values using the classes_ attribute of the stacking classifier
hybrid_pred = [hybrid_model.classes_[i] for i in hybrid_prob.argmax(axis=1)]

# Print the final disease prediction for the custom input
print("The final disease prediction is:", hybrid_pred[0])
