## This module helps to generate Model weights to be used later without overhead

In [1]:
# Filter & ignore warnings for clear output visualization

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Import all necessary packages

import joblib
import pandas as pd
from statistics import mean
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

In [3]:
# Load datasets for all possible combinations & for individual disease's respective symptoms

df_combination = pd.read_csv("./Disease_Symptom_Dataset_For_All_Symptom_Subsets.csv") 
df_independent = pd.read_csv("./Disease_Symptom_Dataset_For_Respective_Symptoms.csv") 

X_combination = df_combination.iloc[:, 1:]
Y_combination = df_combination.iloc[:, 0:1]

X_independent = df_independent.iloc[:, 1:]
Y_independent = df_independent.iloc[:, 0:1]


In [4]:
# Create Logistic Regression Classifier & fit the data to it
print("Processing with Logistic Regression...")

lr_classifier = LogisticRegression()
lr_classifier = lr_classifier.fit(X_combination, Y_combination)
filename = 'log_reg.sav'
joblib.dump(lr_classifier, filename)

lr_scores = cross_val_score(lr_classifier, X_combination, Y_combination, cv=5)
lr_mean_score = mean(lr_scores)
filename = 'log_reg_cv.sav'
joblib.dump(lr_mean_score, filename)


# Create Random Forest Classifier & fit the data to it
print("Processing with Random Forest Classifier...")

rf_classifier = RandomForestClassifier(n_estimators=10, criterion='entropy')
rf_classifier = rf_classifier.fit(X_combination, Y_combination)
filename = 'rand_forest.sav'
joblib.dump(rf_classifier, filename)

rf_scores = cross_val_score(rf_classifier, X_combination, Y_combination, cv=5)
rf_mean_score = mean(rf_scores)
filename = 'rand_forest_cv.sav'
joblib.dump(rf_mean_score, filename)


# Create KNN Classifier & fit the data to it
print("Processing with KNN Classifier...")

knn_classifier = KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)
knn_classifier = knn_classifier.fit(X_combination, Y_combination)
filename = 'knn.sav'
joblib.dump(knn_classifier, filename)

knn_scores = cross_val_score(knn_classifier, X_combination, Y_combination, cv=5)
knn_mean_score = mean(knn_scores)
filename = 'knn_cv.sav'
joblib.dump(knn_mean_score, filename)


# Create Multinomial Naive Bayes Classifier & fit the data to it
print("Processing with Multinomial Naive Bayes...")

mnb_classifier = MultinomialNB()
mnb_classifier = mnb_classifier.fit(X_combination, Y_combination)
filename = 'mnb.sav'
joblib.dump(mnb_classifier,filename)

mnb_scores = cross_val_score(mnb_classifier, X_combination, Y_combination, cv=5)
mnb_mean_score = mean(mnb_scores)
filename = 'mnb_cv.sav'
joblib.dump(mnb_mean_score, filename)


print("Saved model weights successfully !!")


Processing with Logistic Regression...
Processing with Random Forest Classifier...
Processing with KNN Classifier...
Processing with Multinomial Naive Bayes...
Saved model weights successfully !!
