# Forward selection

In [1]:
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector

In [2]:
df = pd.read_csv("wine.csv")

In [3]:
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,0
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,0
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,0
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,0
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,0


In [4]:
df.shape

(178, 14)

In [5]:
df.isnull().sum()

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
target                          0
dtype: int64

In [6]:
df[df.isnull().any(axis=1)]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target


In [7]:
X = df.iloc[:, 0:13]
y = df.iloc[:, -1]

In [8]:
X.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [9]:
y

0      0
1      0
2      0
3      0
4      0
      ..
173    2
174    2
175    2
176    2
177    2
Name: target, Length: 178, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=100)

In [11]:
from sklearn.ensemble import RandomForestClassifier

In [12]:
X_train.shape

(124, 13)

In [13]:
forward_feature_selection = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                     k_features= 6,
                                                     forward=True,
                                                     floating=False,
                                                     verbose=2,
                                                     scoring= "accuracy",
                                                     cv= 5).fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:   12.1s finished

[2022-06-06 20:05:42] Features: 1/6 -- score: 0.7823333333333333[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:   14.7s finished

[2022-06-06 20:05:57] Features: 2/6 -- score: 0.9436666666666665[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:   12.8s finished

[2022-06-06 20:06:10] Features: 3/6 -- score: 0.968[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | 

In [14]:
forward_feature_selection.k_feature_idx_

(2, 3, 4, 6, 9, 12)

In [15]:
forward_feature_selection.k_feature_names_

('ash',
 'alcalinity_of_ash',
 'magnesium',
 'flavanoids',
 'color_intensity',
 'proline')

In [16]:
forward_feature_selection.k_score_

0.984

In [17]:
pd.DataFrame.from_dict(forward_feature_selection.get_metric_dict()).T

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(6,)","[0.76, 0.76, 0.8, 0.8, 0.7916666666666666]",0.782333,"(flavanoids,)",0.0237615,0.0184872,0.00924362
2,"(6, 9)","[0.96, 1.0, 1.0, 0.8, 0.9583333333333334]",0.943667,"(flavanoids, color_intensity)",0.0952658,0.07412,0.03706
3,"(4, 6, 9)","[0.96, 1.0, 1.0, 0.88, 1.0]",0.968,"(magnesium, flavanoids, color_intensity)",0.0599558,0.0466476,0.0233238
4,"(4, 6, 9, 12)","[0.96, 1.0, 0.96, 0.96, 1.0]",0.976,"(magnesium, flavanoids, color_intensity, proline)",0.0251865,0.0195959,0.00979796
5,"(2, 4, 6, 9, 12)","[0.96, 1.0, 0.96, 1.0, 1.0]",0.984,"(ash, magnesium, flavanoids, color_intensity, ...",0.0251865,0.0195959,0.00979796
6,"(2, 3, 4, 6, 9, 12)","[0.96, 1.0, 0.96, 1.0, 1.0]",0.984,"(ash, alcalinity_of_ash, magnesium, flavanoids...",0.0251865,0.0195959,0.00979796


In [18]:
forward_feature_selection = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                     k_features= (1,13),
                                                     forward=True,
                                                     floating=False,
                                                     verbose=2,
                                                     scoring= "accuracy",
                                                     cv= 5).fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:   16.7s finished

[2022-06-06 20:07:03] Features: 1/13 -- score: 0.7823333333333333[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:   15.4s finished

[2022-06-06 20:07:19] Features: 2/13 -- score: 0.9269999999999999[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:   15.0s finished

[2022-06-06 20:07:34] Features: 3/13 -- score: 0.9676666666666666[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  

In [19]:
forward_feature_selection.k_feature_names_

('alcohol', 'ash', 'magnesium', 'flavanoids', 'color_intensity', 'proline')

In [20]:
forward_feature_selection.k_score_

0.992

In [21]:
pd.DataFrame.from_dict(forward_feature_selection.get_metric_dict()).T

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(6,)","[0.76, 0.76, 0.8, 0.8, 0.7916666666666666]",0.782333,"(flavanoids,)",0.0237615,0.0184872,0.00924362
2,"(6, 9)","[0.92, 0.96, 1.0, 0.88, 0.875]",0.927,"(flavanoids, color_intensity)",0.0613179,0.0477074,0.0238537
3,"(4, 6, 9)","[1.0, 1.0, 1.0, 0.88, 0.9583333333333334]",0.967667,"(magnesium, flavanoids, color_intensity)",0.0600353,0.0467095,0.0233548
4,"(4, 6, 9, 12)","[0.96, 1.0, 0.96, 1.0, 1.0]",0.984,"(magnesium, flavanoids, color_intensity, proline)",0.0251865,0.0195959,0.00979796
5,"(2, 4, 6, 9, 12)","[0.96, 1.0, 0.96, 1.0, 1.0]",0.984,"(ash, magnesium, flavanoids, color_intensity, ...",0.0251865,0.0195959,0.00979796
6,"(0, 2, 4, 6, 9, 12)","[0.96, 1.0, 1.0, 1.0, 1.0]",0.992,"(alcohol, ash, magnesium, flavanoids, color_in...",0.0205647,0.016,0.008
7,"(0, 2, 4, 6, 7, 9, 12)","[0.96, 1.0, 1.0, 1.0, 1.0]",0.992,"(alcohol, ash, magnesium, flavanoids, nonflava...",0.0205647,0.016,0.008
8,"(0, 2, 3, 4, 6, 7, 9, 12)","[0.96, 1.0, 1.0, 1.0, 1.0]",0.992,"(alcohol, ash, alcalinity_of_ash, magnesium, f...",0.0205647,0.016,0.008
9,"(0, 2, 3, 4, 6, 7, 8, 9, 12)","[0.96, 1.0, 1.0, 1.0, 1.0]",0.992,"(alcohol, ash, alcalinity_of_ash, magnesium, f...",0.0205647,0.016,0.008
10,"(0, 2, 3, 4, 5, 6, 7, 8, 9, 12)","[0.96, 0.96, 1.0, 1.0, 1.0]",0.984,"(alcohol, ash, alcalinity_of_ash, magnesium, t...",0.0251865,0.0195959,0.00979796
