# Ensemble Methods Algoritmos

Boosting Methods: AdaBoost (AB) and Gradient Boosting (GBM).

Bagging Methods: Random Forests (RF) and Extra Trees (ET).

In [1]:
import pandas as pd
import numpy
from pandas import read_csv
from matplotlib import pyplot

%matplotlib inline
pd.set_option('display.width', 100)
pd.set_option('precision', 3)

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

filename = "data/boston-housing/housing.csv"

names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO',
'B', 'LSTAT', 'MEDV']
dataset = read_csv(filename, names=names)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split-out validation dataset
array = dataset.values
X = array[:,0:13]
Y = array[:,13]

validation_size = 0.20 #   20%
seed = 7
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

# Escalar Datos
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)

In [4]:
from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor


# ensembles
ensembles = []
ensembles.append(('ScaledAB',   Pipeline([('Scaler', StandardScaler()),  ('AB', AdaBoostRegressor())])))
ensembles.append(('ScaledGBM',  Pipeline([('Scaler', StandardScaler()),  ('GBM', GradientBoostingRegressor())])))
ensembles.append(('ScaledRF',   Pipeline([('Scaler', StandardScaler()),  ('RF', RandomForestRegressor())])))
ensembles.append(('ScaledET',   Pipeline([('Scaler', StandardScaler()),  ('ET', ExtraTreesRegressor())])))



In [9]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

#parametros
num_folds = 10
seed = 7
scoring = 'neg_mean_squared_error'

results = []
names = []

for name, model in ensembles:
    kfold = KFold(n_splits=num_folds, random_state=seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    
    results.append(cv_results)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

ScaledAB: -14.917635 (5.969747)
ScaledGBM: -10.223861 (4.523565)
ScaledRF: -13.733081 (8.308561)
ScaledET: -9.597902 (4.462977)


### Se selecciona el  Gradient Boosting (GBM).