In [2]:
import numpy as np
import pandas as pd
import pickle 
from scipy.stats import mode
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

# Reading the train.csv by removing the 
# last column since it's an empty column
DATA_PATH = "Discount\disease\Training.csv"
data = pd.read_csv(DATA_PATH).dropna(axis = 1)

# Checking whether the dataset is balanced or not
disease_counts = data["prognosis"].value_counts()
temp_df = pd.DataFrame({
	"Disease": disease_counts.index,
	"Counts": disease_counts.values
})

# Encoding the target value into numerical
# value using LabelEncoder
encoder = LabelEncoder()
data["prognosis"] = encoder.fit_transform(data["prognosis"])



X = data.iloc[:,:-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test =train_test_split(
X, y, test_size = 0.2, random_state = 24)



# Initializing Models
models = {
	"SVC":SVC(),
	"Gaussian NB":GaussianNB(),
	"Random Forest":RandomForestClassifier(random_state=18)
}

# Training and testing SVM Classifier
svm_model = SVC()
svm_model.fit(X_train, y_train)
preds = svm_model.predict(X_test)

# Training and testing Naive Bayes Classifier
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
preds = nb_model.predict(X_test)

# Training and testing Random Forest Classifier
rf_model = RandomForestClassifier(random_state=18)
rf_model.fit(X_train, y_train)
preds = rf_model.predict(X_test)



# Training the models on whole data
final_svm_model = SVC()
final_nb_model = GaussianNB()
final_rf_model = RandomForestClassifier(random_state=18)
final_svm_model.fit(X, y)
final_nb_model.fit(X, y)
final_rf_model.fit(X, y)



# Reading the test data
test_data = pd.read_csv("Discount\disease\Testing.csv").dropna(axis=1)

test_X = test_data.iloc[:, :-1]
test_Y = encoder.transform(test_data.iloc[:, -1])

# Making prediction by take mode of predictions 
# made by all the classifiers
svm_preds = final_svm_model.predict(test_X)
nb_preds = final_nb_model.predict(test_X)
rf_preds = final_rf_model.predict(test_X)

final_preds = [mode([i,j,k])[0][0] for i,j,
			k in zip(svm_preds, nb_preds, rf_preds)]

symptoms = X.columns.values

# Creating a symptom index dictionary to encode the
# input symptoms into numerical form
symptom_index = {}
for index, value in enumerate(symptoms):
	symptom = " ".join([i.capitalize() for i in value.split("_")])
	symptom_index[symptom] = index

data_dict = {
	"symptom_index":symptom_index,
	"predictions_classes":encoder.classes_
}





IndexError: invalid index to scalar variable.

In [35]:
import numpy as np
import pandas as pd
import pickle 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

def predictDisease(symptoms):

    # Reading the train.csv by removing the 
    # last column since it's an empty column
    DATA_PATH = r"C:\Users\ajayk\OneDrive\Documents\GitHub\Disease_Predictor\Discount\disease\Training.csv"
    data = pd.read_csv(DATA_PATH).dropna(axis=1)

    # Encoding the target value into numerical
    # value using LabelEncoder
    encoder = LabelEncoder()
    data["prognosis"] = encoder.fit_transform(data["prognosis"])

    X = data.iloc[:, :-1]
  

    final_rf_model = pickle.load(open(r"C:\Users\ajayk\OneDrive\Documents\GitHub\Disease_Predictor\Discount\models\final_rf_model.pkl", "rb"))
    final_nb_model = pickle.load(open(r"C:\Users\ajayk\OneDrive\Documents\GitHub\Disease_Predictor\Discount\models\final_nb_model.pkl", "rb"))
    final_svm_model = pickle.load(open(r"C:\Users\ajayk\OneDrive\Documents\GitHub\Disease_Predictor\Discount\models\final_svm_model.pkl", "rb"))

    # Creating a symptom index dictionary to encode the
    # input symptoms into numerical form
    symptoms = symptoms.split(",")
    symptom_index = {symptom.strip(): i for i, symptom in enumerate(X.columns)}

    data_dict = {
        "symptom_index": symptom_index,
        "predictions_classes": encoder.classes_
    }

    # Creating input data for the model
    input_data = np.zeros(len(data_dict["symptom_index"]))
    for symptom in symptoms:
        index = data_dict["symptom_index"].get(symptom.strip())
        if index is not None:
            input_data[index] = 1

    # Reshaping the input data and converting it
    # into suitable format for model predictions
    input_data = np.array(input_data).reshape(1,-1)

    # Generating individual outputs
    rf_prediction = encoder.inverse_transform([final_rf_model.predict(input_data)])[0]
    nb_prediction = encoder.inverse_transform([final_nb_model.predict(input_data)])[0]
    svm_prediction = encoder.inverse_transform([final_svm_model.predict(input_data)])[0]

    # Making final prediction by taking mode of all predictions
    all_predictions = [rf_prediction, nb_prediction, svm_prediction]
    final_prediction = np.unique(all_predictions)[0]

    predictions = {
        "rf_model_prediction": rf_prediction,
        "naive_bayes_prediction": nb_prediction,
        "svm_model_prediction": svm_prediction,
        "final_prediction": final_prediction
    }
    return (predictions)

predictDisease("Itching,Skin Rash,Nodal Skin Eruptions")





https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


{'rf_model_prediction': 'Arthritis',
 'naive_bayes_prediction': 'Allergy',
 'svm_model_prediction': 'AIDS',
 'final_prediction': 'AIDS'}

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


{'rf_model_prediction': 'Arthritis',
 'naive_bayes_prediction': 'Allergy',
 'svm_model_prediction': 'AIDS',
 'final_prediction': 'Allergy'}