Atividade para hoje será a construção de um ensemble heterogêneo usando a metodologia Bagging:
- Utilizar no ensemble os classificadores: Árvore de Decisão, SVM e Redes Neurais Artificiais - ok
- Encontrar a quantidade de componentes do ensemble e a configuração dos mesmos. Lembrando que devem ser classificadores simples - ok
- Utilizar a metodologia do Bagging para re-amostragem do conjunto de treinamento - ok
- Utilizar validação cruzada para avaliar a qualidade do ensemble durante sua construção usando a acurácia do treino e validação
- Apresentar ao final um histórico de execuções/buscas por configurações do ensemble e a melhor. Apresentar os desempenhos de treinamento, validação e teste da melhor configuração do ensemble.

In [28]:
# Get some classifiers to evaluate with BaggingClassifier
import numpy as np
import pandas
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn import tree
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
from matplotlib import pyplot as plt
simplefilter("ignore", category=ConvergenceWarning)
plt.rcParams['figure.figsize'] = [16, 10]

seed = 1075
np.random.seed(seed)

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv("pima-indians-diabetes.csv", names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

In [34]:
# Create base classifiers
svc = SVC(kernel='linear')
neural_network = MLPClassifier(hidden_layer_sizes=(100,), random_state=10, early_stopping=True, n_iter_no_change=5)
DecisionTree = tree.DecisionTreeClassifier(random_state=42,max_depth=10)

#training ensembles of classifiers
clf_array = [svc,neural_network,DecisionTree]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    bagging_clf = BaggingClassifier(clf,max_samples=0.3, random_state=seed)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv=10)
    print('Media clf ',pima_scores.mean(), '\nMedia Bagging',bagging_scores.mean(),'\nDesvio ',pima_scores.std(), '\nDesvio bagging ',bagging_scores.std())



Media clf  0.7669685577580314 
Media Bagging 0.7669685577580315 
Desvio  0.03338716735131549 
Desvio bagging  0.034460545140375436
Media clf  0.6536568694463432 
Media Bagging 0.6484449760765549 
Desvio  0.007970053778345003 
Desvio bagging  0.027089802471656085
Media clf  0.7187457279562542 
Media Bagging 0.7526315789473684 
Desvio  0.07277566442414571 
Desvio bagging  0.04215367032124066


In [19]:

# Create base classifiers
svc = SVC(kernel='linear')
neural_network = MLPClassifier(hidden_layer_sizes=(100,), random_state=10, early_stopping=True, n_iter_no_change=5)
DecisionTree = tree.DecisionTreeClassifier(random_state=42,max_depth=10)

#training ensembles of classifiers
clf_array = [svc,neural_network,DecisionTree]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    bagging_clf = BaggingClassifier(clf,max_samples=0.3, random_state=seed, bootstrap = True)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv=10)    
    print('Media clf ',pima_scores.mean(), '\nMedia Bagging',bagging_scores.mean(),'\nDesvio ',pima_scores.std(), '\nDesvio bagging ',bagging_scores.std())


Media clf  0.7669685577580314 
Media Bagging 0.7669685577580315 
Desvio  0.03338716735131549 
Desvio bagging  0.034460545140375436
Media clf  0.6536568694463432 
Media Bagging 0.6484449760765549 
Desvio  0.007970053778345003 
Desvio bagging  0.027089802471656085
Media clf  0.7187457279562542 
Media Bagging 0.7526315789473684 
Desvio  0.07277566442414571 
Desvio bagging  0.04215367032124066


In [20]:
# Create base classifiers
svc = SVC(kernel='linear')
neural_network = MLPClassifier(hidden_layer_sizes=(100,), random_state=10, early_stopping=True, n_iter_no_change=5)
DecisionTree = tree.DecisionTreeClassifier(random_state=42,max_depth=10)

#training ensembles of classifiers
clf_array = [svc,neural_network,DecisionTree]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    bagging_clf = BaggingClassifier(clf,max_samples=0.3, random_state=seed, bootstrap = True, n_estimators = 20, warm_start = True)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv=10)    
    print('Media clf ',pima_scores.mean(), '\nMedia Bagging',bagging_scores.mean(),'\nDesvio ',pima_scores.std(), '\nDesvio bagging ',bagging_scores.std())

Media clf  0.7669685577580314 
Media Bagging 0.7708817498291183 
Desvio  0.03338716735131549 
Desvio bagging  0.03570846381364376
Media clf  0.6536568694463432 
Media Bagging 0.6562200956937799 
Desvio  0.007970053778345003 
Desvio bagging  0.04488597473110575
Media clf  0.7187457279562542 
Media Bagging 0.7565447710184552 
Desvio  0.07277566442414571 
Desvio bagging  0.05622202002827217


In [22]:
# Create base classifiers
svc = SVC(kernel='linear')
neural_network = MLPClassifier(hidden_layer_sizes=(100,), random_state=10, early_stopping=True, n_iter_no_change=5)
DecisionTree = tree.DecisionTreeClassifier(random_state=42,max_depth=10)

#training ensembles of classifiers
clf_array = [svc,neural_network,DecisionTree]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    bagging_clf = BaggingClassifier(clf,max_samples=0.3, random_state=seed, bootstrap = True, n_estimators = 50, warm_start = True)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv=10)    
    print('Media clf ',pima_scores.mean(), '\nMedia Bagging',bagging_scores.mean(),'\nDesvio ',pima_scores.std(), '\nDesvio bagging ',bagging_scores.std())

Media clf  0.7669685577580314 
Media Bagging 0.7747778537252221 
Desvio  0.03338716735131549 
Desvio bagging  0.032220613765320885
Media clf  0.6536568694463432 
Media Bagging 0.6536568694463432 
Desvio  0.007970053778345003 
Desvio bagging  0.007970053778345003
Media clf  0.7187457279562542 
Media Bagging 0.7682843472317157 
Desvio  0.07277566442414571 
Desvio bagging  0.058801599472932536


In [24]:
# Create base classifiers
svc = SVC(kernel='linear')
neural_network = MLPClassifier(hidden_layer_sizes=(100,), random_state=10, early_stopping=True, n_iter_no_change=5)
DecisionTree = tree.DecisionTreeClassifier(random_state=42,max_depth=10)

#training ensembles of classifiers
clf_array = [svc,neural_network,DecisionTree]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    bagging_clf = BaggingClassifier(clf,max_samples=0.3, random_state=seed, bootstrap = True, n_estimators = 100, warm_start = True, n_jobs = -1)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv=10)    
    print('Media clf ',pima_scores.mean(), '\nMedia Bagging',bagging_scores.mean(),'\nDesvio ',pima_scores.std(), '\nDesvio bagging ',bagging_scores.std())

Media clf  0.7669685577580314 
Media Bagging 0.7721804511278195 
Desvio  0.03338716735131549 
Desvio bagging  0.030673707245564594
Media clf  0.6536568694463432 
Media Bagging 0.6510594668489406 
Desvio  0.007970053778345003 
Desvio bagging  0.003417634996582386
Media clf  0.7187457279562542 
Media Bagging 0.774794941900205 
Desvio  0.07277566442414571 
Desvio bagging  0.056212483409292656
