### Wrapper Methods
* 1. `Exhustive search`
* 2. `Forward feature selection`
    * 2.1 using `sklearn`
    * 2.2 using `mlxtend`
    
* 3. `Backward feature selection`
    * 2.1 using `sklearn`
    * 2.2 using `mlxtend`

### 1. Exhustive Search

In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# mlextend
from mlxtend.feature_selection import ExhaustiveFeatureSelector

In [2]:
brest_cancer = load_breast_cancer()
X = pd.DataFrame(brest_cancer.data, columns = brest_cancer.feature_names)
y = brest_cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [3]:
efs = ExhaustiveFeatureSelector(
    estimator=RandomForestClassifier(n_estimators=3, random_state=0),
    min_features=1,
    max_features=4,
    scoring = "roc_auc",
    cv =2)

efs.fit(X_train, y_train)

Features: 31930/31930

In [4]:
efs.best_feature_names_

('compactness error', 'worst area', 'worst concavity', 'worst symmetry')

In [5]:
efs.subsets_[0]

{'feature_idx': (0,),
 'cv_scores': array([0.83105044, 0.86616541]),
 'avg_score': 0.8486079287582265,
 'feature_names': ('mean radius',)}

In [6]:
X_train_t = efs.transform(X_train)
X_test_t = efs.transform(X_test)

In [8]:
X_test_t[0:10]

array([[2.265e-02, 8.444e+02, 5.106e-01, 3.585e-01],
       [8.082e-03, 6.329e+02, 1.390e-01, 2.444e-01],
       [9.238e-03, 6.889e+02, 6.260e-02, 2.136e-01],
       [1.377e-02, 8.197e+02, 1.565e-01, 2.636e-01],
       [4.899e-03, 5.459e+02, 4.833e-02, 1.987e-01],
       [1.641e-02, 4.786e+02, 1.624e-01, 3.060e-01],
       [4.560e-02, 4.884e+02, 2.912e-01, 2.191e-01],
       [9.110e-03, 4.831e+02, 7.915e-02, 3.487e-01],
       [9.692e-03, 2.480e+02, 0.000e+00, 3.058e-01],
       [4.671e-02, 3.571e+02, 7.162e-02, 2.434e-01]])

### 2. Forward Feature Selection

##### 2.1 Using Sklearn

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SequentialFeatureSelector as SFS

In [29]:
brest_cancer = load_breast_cancer()
X = pd.DataFrame(brest_cancer.data, columns = brest_cancer.feature_names)
y = brest_cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [30]:
sfs = SFS(
    estimator=RandomForestClassifier(n_estimators=5, random_state=0),
    n_features_to_select="auto",
    tol=0.001,
    direction="forward",
    scoring="roc_auc",
    cv = 3
    )

sfs.fit(X_train , y_train)
sfs.get_feature_names_out()

array(['mean smoothness', 'mean concavity', 'worst texture',
       'worst perimeter', 'worst concavity'], dtype=object)

In [31]:
X_train_t = sfs.transform(X_train)
X_test_t = sfs.transform(X_test)

##### 2.2 Using mlextend

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [33]:
brest_cancer = load_breast_cancer()
X = pd.DataFrame(brest_cancer.data, columns = brest_cancer.feature_names)
y = brest_cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [34]:
sfs = SFS(
    estimator=RandomForestClassifier(n_estimators=5, random_state=0),
    k_features=10,
    forward=True,
    scoring="roc_auc",
    verbose=1,
    cv = 3
    )

sfs.fit(X_train , y_train)
sfs.k_feature_names_

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.9s finished
Features: 1/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.7s finished
Features: 2/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.8s finished
Features: 3/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.9s finished
Features: 4/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.8s finished
Features: 5/10[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.8s finished
Features: 6/10[Parallel(

('mean perimeter',
 'mean compactness',
 'mean fractal dimension',
 'compactness error',
 'concave points error',
 'worst texture',
 'worst perimeter',
 'worst smoothness',
 'worst concave points',
 'worst symmetry')

In [35]:
X_train_t = sfs.transform(X_train)
X_test_t = sfs.transform(X_test)

In [36]:
X_test_t[0:10]

array([[8.864e+01, 1.469e-01, 7.325e-02, 2.265e-02, 1.334e-02, 2.966e+01,
        1.133e+02, 1.574e-01, 2.051e-01, 3.585e-01],
       [8.410e+01, 5.205e-02, 5.584e-02, 8.082e-03, 6.451e-03, 3.423e+01,
        9.129e+01, 1.289e-01, 6.005e-02, 2.444e-01],
       [8.959e+01, 5.581e-02, 5.586e-02, 9.238e-03, 1.076e-02, 1.931e+01,
        9.653e+01, 1.034e-01, 8.216e-02, 2.136e-01],
       [9.122e+01, 5.220e-02, 5.586e-02, 1.377e-02, 5.243e-03, 2.526e+01,
        1.058e+02, 9.445e-02, 7.530e-02, 2.636e-01],
       [8.261e+01, 3.766e-02, 5.863e-02, 4.899e-03, 1.164e-02, 2.281e+01,
        8.446e+01, 9.701e-02, 5.013e-02, 1.987e-01],
       [7.276e+01, 8.499e-02, 6.211e-02, 1.641e-02, 1.107e-02, 2.303e+01,
        7.915e+01, 1.483e-01, 8.542e-02, 3.060e-01],
       [7.875e+01, 1.073e-01, 6.659e-02, 4.560e-02, 1.667e-02, 2.871e+01,
        8.736e+01, 8.799e-02, 1.092e-01, 2.191e-01],
       [7.433e+01, 5.253e-02, 6.128e-02, 9.110e-03, 7.638e-03, 2.655e+01,
        8.092e+01, 1.223e-01, 5.741e-

In [37]:
sfs.k_features

10

In [40]:
# Transform into data frame
pd.DataFrame(X_train_t, columns = sfs.k_feature_names_)

Unnamed: 0,mean perimeter,mean compactness,mean fractal dimension,compactness error,concave points error,worst texture,worst perimeter,worst smoothness,worst concave points,worst symmetry
0,75.54,0.05642,0.05715,0.015000,0.008578,25.75,84.35,0.13690,0.09140,0.3101
1,71.94,0.06779,0.06028,0.011040,0.004967,25.78,76.91,0.14240,0.02022,0.3292
2,131.20,0.10340,0.05533,0.024230,0.016780,38.25,155.00,0.11660,0.16280,0.2572
3,86.24,0.04052,0.05520,0.005274,0.005044,26.10,98.91,0.10500,0.05185,0.2335
4,107.10,0.07112,0.05325,0.014460,0.005297,26.56,127.30,0.10090,0.08737,0.4677
...,...,...,...,...,...,...,...,...,...,...
421,120.90,0.05884,0.04996,0.011140,0.014630,24.30,129.00,0.12430,0.12940,0.2567
422,83.97,0.23960,0.08243,0.072170,0.014320,40.68,97.65,0.18530,0.22100,0.4366
423,59.82,0.05956,0.06959,0.008982,0.006565,25.02,75.79,0.13330,0.05052,0.2454
424,60.73,0.02344,0.06447,0.006736,0.000000,20.83,62.25,0.07117,0.00000,0.1909


### 3 Backward feature elimination

In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SequentialFeatureSelector as SFS

##### 3.1 Using sklearn

In [55]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [56]:
sfs = SFS(
    estimator=RandomForestRegressor(n_estimators=5, random_state=10),
    n_features_to_select="auto",
    tol=0.001,
    direction="backward",
    scoring="r2",
    cv = 3
    )

sfs.fit(X_train , y_train)
sfs.get_feature_names_out()

array(['MedInc', 'HouseAge', 'AveBedrms', 'AveOccup', 'Latitude',
       'Longitude'], dtype=object)

In [57]:
X_train_t = sfs.transform(X_train)
X_test_t = sfs.transform(X_test)

##### 3.2 Using mlxtend

In [58]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [59]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [60]:
sfs = SFS(
    estimator=RandomForestRegressor(n_estimators=5, random_state=0),
    k_features=5,
    forward=False,
    scoring="r2",
    verbose=1,
    cv = 3
    )

sfs.fit(X_train , y_train)
sfs.k_feature_names_

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    6.9s finished
Features: 7/5[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    5.5s finished
Features: 6/5[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    4.5s finished
Features: 5/5

('HouseAge', 'AveRooms', 'AveBedrms', 'Latitude', 'Longitude')

In [64]:
X_train_t = sfs.transform(X_train)
X_test_t = sfs.transform(X_test)

X_train_t[0:10]

array([[  19.        ,    7.9616    ,    1.1376    ,   38.46      ,
        -122.68      ],
       [  18.        ,    6.5473251 ,    1.10288066,   32.95      ,
        -117.24      ],
       [  19.        ,    5.20754717,    1.21698113,   34.68      ,
        -118.27      ],
       [  13.        ,    6.42857143,    1.        ,   33.51      ,
        -117.18      ],
       [  21.        ,    4.42934783,    1.03668478,   38.62      ,
        -121.41      ],
       [  24.        ,    5.69372694,    0.94833948,   34.04      ,
        -118.09      ],
       [  26.        ,    5.77915194,    1.05123675,   34.09      ,
        -117.62      ],
       [  12.        ,    5.78106904,    1.06859688,   34.4       ,
        -118.46      ],
       [  14.        ,    5.32374101,    1.07913669,   33.73      ,
        -117.02      ],
       [  30.        ,    3.37669377,    1.01084011,   34.26      ,
        -118.43      ]])

In [66]:
# Transform into data frame
pd.DataFrame(X_train_t, columns = sfs.k_feature_names_).head(4)

Unnamed: 0,HouseAge,AveRooms,AveBedrms,Latitude,Longitude
0,19.0,7.9616,1.1376,38.46,-122.68
1,18.0,6.547325,1.102881,32.95,-117.24
2,19.0,5.207547,1.216981,34.68,-118.27
3,13.0,6.428571,1.0,33.51,-117.18
