In [14]:
# Regression Example With Boston Dataset: Baseline
import math
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from sklearn.preprocessing import LabelEncoder
from joblib import dump, load
# Standard ML Models for comparison
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, precision_score, classification_report
# Splitting data into training/testing
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.preprocessing import MinMaxScaler

#Classifiers
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis


# Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error

# Distributions
import scipy

In [5]:
multiclass = False
df = pd.read_csv("./acute-inflammations-diagnosis-discretized.csv")
#df = df.sample(frac=1)
cols = df.columns

In [3]:
X = df.iloc[:,0:len(cols)-1]
Y = df.iloc[:,len(cols)-1]

In [34]:
def model_report(model_name, model, model_scores):
    model_dict = {'model_name': model_name, 'model': model}
    model_dict['test_accuracy'] = model_scores['test_accuracy']
    model_dict['test_recall'] = model_scores['test_recall']
    model_dict['test_precision'] = model_scores['test_precision']
    return model_dict

In [33]:
def evaluate():
    # Instantiate the models
    all_models = []
    scoring_metrics = ['accuracy', 'precision', 'recall']
    model_names = ['MLPClassifier', 'KNeighborsClassifier', 'SVC-Linear', 'SVC', \
                    'GaussianProcessClassifier', 'DecisionTreeClassifier', \
                    'RandomForestClassifier', 'AdaBoostClassifier', 'GaussianNB', \
                    'QuadraticDiscriminantAnalysis']
    model1 = MLPClassifier(alpha=1, max_iter=1000)
    model2 = KNeighborsClassifier(3)
    model3 = SVC(kernel="linear", C=0.025)
    model4 = SVC(gamma=2, C=1)
    model5 = GaussianProcessClassifier(1.0 * RBF(1.0))
    model6 = DecisionTreeClassifier(max_depth=5)
    model7 = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
    model8 = AdaBoostClassifier()
    model9 = GaussianNB()
    model10 = QuadraticDiscriminantAnalysis()
    
    # Train and predict with each model
    for i, model in enumerate([model1, model2, model3, model4, model5, model6, model7, model8, model9, model10]):
        scoring = cross_validate(model, X, Y, cv=2, scoring=scoring_metrics, return_train_score=True, return_estimator=True)
        all_models.append(model_report(model_names[i], model, scoring))
    
    return all_models
models = evaluate()



In [35]:
def round_ave(number):
    return np.round(np.average(number), decimals=3)
for model in models:
    print("Model Name: " + str(model['model_name']))
    print("Model accuracy: " + str(round_ave(model['test_accuracy'])))
    print("Model recall: " + str(round_ave(model['test_recall'])))
    print("Model precision: " + str(round_ave(model['test_precision'])))
    print("\n")

Model Name: MLPClassifier
Model accuracy: 0.908
Model recall: 1.0
Model precision: 0.847


Model Name: KNeighborsClassifier
Model accuracy: 0.817
Model recall: 0.82
Model precision: 0.829


Model Name: SVC-Linear
Model accuracy: 0.842
Model recall: 0.82
Model precision: 0.857


Model Name: SVC
Model accuracy: 0.808
Model recall: 0.82
Model precision: 0.821


Model Name: GaussianProcessClassifier
Model accuracy: 0.825
Model recall: 0.84
Model precision: 0.829


Model Name: DecisionTreeClassifier
Model accuracy: 0.8
Model recall: 0.82
Model precision: 0.812


Model Name: RandomForestClassifier
Model accuracy: 0.925
Model recall: 0.82
Model precision: 1.0


Model Name: AdaBoostClassifier
Model accuracy: 0.808
Model recall: 0.84
Model precision: 0.812


Model Name: GaussianNB
Model accuracy: 0.917
Model recall: 0.82
Model precision: 0.981


Model Name: QuadraticDiscriminantAnalysis
Model accuracy: 0.867
Model recall: 1.0
Model precision: 0.805


