In [97]:
import numpy as np

### Filtrado de variables

In [98]:
from sklearn.datasets import load_wine

X, y = load_wine(return_X_y=True)

In [99]:
# verificamos nuestros datos
X.shape

(178, 13)

#### Univariate Selection

In [100]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

In [101]:
# Elegimos 4 atributos representativos
univ_sel = SelectKBest(score_func=f_classif, k=6)
fit = univ_sel.fit(X, y)

# Imprimimos los atributos ordenados por importancia
print(np.argsort(fit.scores_)[::-1])

# Imprimimos los 6 atributos seleccionados
print(np.argsort(fit.scores_)[::-1][:6])

[ 6 12 11  0  9 10  5  1  3  8  7  2  4]
[ 6 12 11  0  9 10]


#### Recursive Feature Elimination

In [102]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

In [103]:
# elegimos 4 atributos representativos y un modelo de regresion lineal
model = LogisticRegression(solver='lbfgs', max_iter=10000)
rfe = RFE(model, n_features_to_select=6)
fit = rfe.fit(X, y)

print(np.where(fit.support_)[0])

[ 0  2  5  6  9 11]


#### Feature Importance

In [104]:
from sklearn.ensemble import ExtraTreesClassifier

In [105]:
model = ExtraTreesClassifier(n_estimators=10)
model.fit(X, y)


# Imprimimos los atributos ordenados por importancia
print(np.argsort(model.feature_importances_)[::-1])

# Imprimimos los 6 atributos seleccionados
print(np.argsort(model.feature_importances_)[::-1][:6])

[ 6  9  0 11 12 10  5  4  2  8  7  3  1]
[ 6  9  0 11 12 10]


### Ajuste de parametros

In [106]:
from sklearn.datasets import load_boston

X, y = load_boston(return_X_y=True)

In [107]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import svm

In [108]:
param_grid = {
    "learning_rate": [0.1, 0.05, 0.02, 0.01],
    "max_depth": [4, 6],
    "min_samples_leaf": [3, 5, 9, 17],
    "max_features": [1.0, 0.3, 0.1]
}

est = GradientBoostingRegressor(n_estimators=3000)
gs_cv = GridSearchCV(est, param_grid).fit(X, y)

# Imprimimos el mejor set de hiperparametros
print(gs_cv.best_params_)

{'learning_rate': 0.05, 'max_depth': 4, 'max_features': 0.3, 'min_samples_leaf': 3}
