In [5]:
from joblib import load

# Load models from disk
knn_classifier = load('./models/knn_classifier.joblib')
linear_svm = load('./models/linear_svm.joblib')
poly_svm = load('./models/poly_svm.joblib')
rbf_svm = load('./models/rbf_svm.joblib')

In [6]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# load dataset
feature_names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

df = pd.read_csv('./data/pima-indians-diabetes.csv',
                 names=feature_names)

new_df = df[['plas', 'mass', 'age', 'class']]
X = new_df.iloc[:, :-1]
y = new_df.iloc[:, -1]

scaler = MinMaxScaler()

# Fit the scaler on the data and transform the features
X_normalized = scaler.fit_transform(X)

# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
          ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]

# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = model_selection.KFold(n_splits=10)
    cv_results = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

LR: 0.769583 (0.049490)
LDA: 0.766986 (0.052847)
KNN: 0.731699 (0.062676)
CART: 0.673018 (0.062154)
NB: 0.766969 (0.056920)
SVM: 0.759142 (0.052559)


In [7]:
from sklearn.ensemble import BaggingClassifier, StackingClassifier, AdaBoostClassifier
from sklearn import model_selection

# Bagging
bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10, random_state=0)
cv_results = model_selection.cross_val_score(bagging, X, y, cv=10, scoring='accuracy')
print(f"Bagging: {cv_results.mean()} ({cv_results.std()})")

# Stacking
stacking = StackingClassifier(estimators=models, final_estimator=LogisticRegression())
cv_results = model_selection.cross_val_score(stacking, X, y, cv=10, scoring='accuracy')
print(f"Stacking: {cv_results.mean()} ({cv_results.std()})")

# Boosting
boosting = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10, random_state=0)
cv_results = model_selection.cross_val_score(boosting, X, y, cv=10, scoring='accuracy')
print(f"Boosting: {cv_results.mean()} ({cv_results.std()})")



Bagging: 0.7291353383458647 (0.06805979835917404)
Stacking: 0.7695488721804511 (0.03758739767056624)
Boosting: 0.6926179084073821 (0.05806871630677114)


