In [1]:
# Import needed packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import RFE, RFECV, SequentialFeatureSelector
from sklearn.pipeline import Pipeline

In [2]:
# Load the Wisconsin Breast Cancer Database
wbcd = pd.read_csv('WisconsinBreastCancerDatabase.csv')

In [3]:
# Select and scale input features, create dataframe for output feature
X = wbcd[['Radius mean', 'Texture mean', 'Area mean', 'Smoothness mean', 
         'Compactness mean', 'Concavity mean', 'Concave points mean', 
          'Fractal dimension mean', 'Symmetry mean']]
y = wbcd[['Diagnosis']]

In [4]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [5]:
# Construct a scaler
scaler = StandardScaler()

In [6]:
# Construct an estimator
estimator = SVC(kernel='linear')

In [7]:
# Construct a recursive feature eliminator
rfe = RFE(estimator, n_features_to_select=5, step=1)

In [8]:
# Construct a pipeline that scales the data and performs RFE
pipe_rfe = Pipeline(steps=[('scaler',scaler),('rfe',rfe)])

In [9]:
# Fit the model at the end of the pipeline using the training set
pipe_rfe.fit(X_train, np.ravel(y_train))

In [10]:
# Display the selected features
X.columns[pipe_rfe['rfe'].support_]

Index(['Radius mean', 'Texture mean', 'Area mean', 'Concave points mean',
       'Symmetry mean'],
      dtype='object')

In [11]:
# Display classification score
pipe_rfe.score(X_test, y_test)

0.9385964912280702

In [12]:
# Construct a recursive feature eliminator with cross-validation
rfecv = RFECV(estimator, cv=4, step=1)

In [13]:
# Construct a pipeline that scales the data and performs RFECV
pipe_rfecv = Pipeline(steps=[('scaler',scaler),('rfecv',rfecv)])

In [14]:
# Fit the model at the end of the pipeline using the training set
pipe_rfecv.fit(X_train, np.ravel(y_train))

In [15]:
# Display the selected features
X.columns[pipe_rfecv['rfecv'].support_]

Index(['Radius mean', 'Texture mean', 'Area mean', 'Smoothness mean',
       'Concave points mean', 'Symmetry mean'],
      dtype='object')

In [16]:
# Display classification score
pipe_rfecv.score(X_test, y_test)

0.9473684210526315

In [17]:
# Construct a backward sequential feature selector and an SVC model
sfs = SequentialFeatureSelector(estimator, direction='backward', cv=10)
model = SVC(kernel='linear')

In [18]:
# Construct a pipeline that scales the data and performs forward SFS
pipe_sfs = Pipeline(steps=[('scaler',scaler),('sfs',sfs),('model',model)])

In [19]:
# Fit the model at the end of the pipeline using the training set
pipe_sfs.fit(X_train, np.ravel(y_train))



In [20]:
# Display the selected features
X.columns[pipe_sfs['sfs'].support_]

Index(['Texture mean', 'Area mean', 'Smoothness mean', 'Concave points mean'], dtype='object')

In [21]:
# Display classification score
pipe_sfs.score(X_test, y_test)

0.9649122807017544