# 5.8.13 Reducción de dimensionalidad usando SelectFromModel

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import LinearSVC
from sklearn.ensemble import ExtraTreesClassifier

import warnings
warnings.filterwarnings("ignore")

## 5.8.13.1 Modelos lineales
Los modelos lineales penalizados con una norma L1 tienen a hacer muchos de los coeficientes de las características iguales a cero, por lo que pueden ser usados para la reducción de la dimensionalidad de los datos (selección de variables). Se recomiendan los siguientes tipos de modelos:
- Lasso()
- LogisticRegress()
- LinearSVC()

In [6]:
X, y = load_iris(return_X_y=True)
X.shape

(150, 4)

In [7]:
#
# Crea y entrena un estimador
#
linearSVC = LinearSVC(
    C=0.01,
    penalty="l1",
    dual=False,
    max_iter=10000,
)

linearSVC.fit(X, y)

#
# Selector
#
model = SelectFromModel(
    # -------------------------------------------------------------------------
    # The base estimator from which the transformer is built. This can be both
    # a fitted (if prefit is set to True) or a non-fitted estimator.
    estimator=linearSVC,
    # -------------------------------------------------------------------------
    # The threshold value to use for feature selection. Features whose
    # importance is greater or equal are kept while the others are discarded.
    # * float.
    # * "median": the threshold value is the median of feature importances.
    # * "mean": the threshold value is the mean of feature importances.
    # * "1.25*mean": a scaling factor
    # * None: if penality is L1, then threshold is 1e-5, otherwise "mean"
    threshold=None,
    # -------------------------------------------------------------------------
    # Whether a prefit model is expected to be passed into the constructor
    # directly or not.
    prefit=True,
    # -------------------------------------------------------------------------
    # Order of the norm used to filter the vectors of coefficients below
    # threshold in the case where the coef_ attribute of the estimator is of
    # dimension 2.
    norm_order=1,
    # -------------------------------------------------------------------------
    # The maximum number of features to select.
    max_features=None,
)

X_new = model.transform(X)
X_new.shape

(150, 3)

## 5.8.13.2 Usando árboles

In [8]:
treeClassifier = ExtraTreesClassifier(n_estimators=50)
treeClassifier = treeClassifier.fit(X, y)
treeClassifier.feature_importances_

array([0.09715395, 0.05691274, 0.42543948, 0.42049383])

In [9]:
model = SelectFromModel(
    estimator=treeClassifier,
    prefit=True,
)

X_new = model.transform(X)
X_new.shape

(150, 2)

In [10]:
print('ok_')

ok_
