# Métodos Ensemble

## Bagging

In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_digits
from sklearn.preprocessing import scale

In [2]:
# Carga de dados
digits = load_digits()

In [3]:
# Pré-processamento
data = scale(digits.data)

In [4]:
# Variáveis preditoras e variável target
x = data
y = digits.target

In [5]:
# Construção do Classificador
bagging = BaggingClassifier(KNeighborsClassifier(), max_samples = 0.5, max_features = 0.5) 

In [6]:
bagging

BaggingClassifier(base_estimator=KNeighborsClassifier(), max_features=0.5,
                  max_samples=0.5)

In [7]:
?cross_val_score

In [8]:
# Score do modelo
scores = cross_val_score(bagging, x, y)

In [9]:
# Média do score
mean = scores.mean()

In [10]:
print(scores)

[0.91388889 0.925      0.94986072 0.95264624 0.93036212]


In [11]:
print(mean)

0.9343515939337668


## Extreley Randomized Trees (ExtraTrees)

In [12]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.preprocessing import scale

In [13]:
# Carregando os dados
digits = load_digits()

In [14]:
# Pré-processamento
data = scale(digits.data)

In [15]:
# Variáveis preditoras e variáveis target
x = data
y = digits.target

In [16]:
# Cria o classificador
clf = DecisionTreeClassifier(max_depth = None, min_samples_split = 2, random_state = 0)
scores = cross_val_score(clf, x, y)
mean = scores.mean()
print(scores)
print(mean)

[0.78055556 0.71388889 0.80779944 0.8356546  0.79665738]
0.7869111730114515


In [17]:
clf = RandomForestClassifier(n_estimators = 10, max_depth = None, min_samples_split = 2, random_state = 0)
scores = cross_val_score(clf, x, y)
mean = scores.mean()
print(scores)
print(mean)

[0.89166667 0.88055556 0.91643454 0.93036212 0.90807799]
0.9054193748065614


In [18]:
clf = ExtraTreesClassifier(n_estimators = 10, max_depth = None, min_samples_split = 2, random_state = 0)
scores = cross_val_score(clf, x, y)
mean = scores.mean()
print(scores)
print(mean)

[0.90277778 0.86944444 0.93593315 0.95264624 0.91364903]
0.9148901268956979


## Adaboost

In [19]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import fetch_openml
from sklearn.model_selection import cross_val_score

In [20]:
# Carregando os dados
heart = fetch_openml('heart')
x = heart.data
y = np.copy(heart.target)
y[y == -1] = 0

In [21]:
# Dataset de treino e de teste
x_test, y_test = x[189:], y[189:]
x_train, y_train = x[:189], y[:189]

In [22]:
# Construindo o estimador base
estim_base = DecisionTreeClassifier(max_depth = 1, min_samples_leaf = 1)
estim_base.fit(x_train, y_train)
estim_base_err = 1.0 - estim_base.score(x_test, y_test)

In [23]:
ada_clf = AdaBoostClassifier(base_estimator = estim_base,
                            learning_rate = 1.0,
                            n_estimators = 400,
                            algorithm = "SAMME")

In [24]:
ada_clf.fit(x_train, y_train)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(max_depth=1),
                   n_estimators=400)

In [25]:
scores = cross_val_score(ada_clf, x_test, y_test)
print(scores)
means = scores.mean()
print(means)

[0.82352941 0.875      0.8125     0.8125     0.75      ]
0.8147058823529412
