This notebook includes wrapper classes for feature selection. 

The classes below may need to be rewritten using inheritance and the `super` function.

In [2]:
import pandas as pd
import numpy  as np

In [3]:
import numpy as np
def variance_scorer (X,y=None):
  return tuple([X.apply(np.var), 
                None
               ]
              )

## 2. `SelectKBestDF`

In [5]:
from sklearn.feature_selection import SelectKBest
from sklearn.base              import BaseEstimator,TransformerMixin

class SelectKBestDF(BaseEstimator,TransformerMixin):

  def __init__(self, score_func, k=10):
    self.score_func = score_func
    self.k          = k

  def fit(self, X, y=None):
    (self.scores_,
     self.pvalues_
    ) = self.score_func(X,y)
    var_nam           = X.columns
    var_ndx_sorted    = self.scores_.argsort()[::-1][:self.k]
    self.columns_keep = [var_nam[i] for i in var_ndx_sorted]
    self.columns_drop = list(set(X.columns)-set(self.columns_keep))
    return self
  
  def transform(self, X):
    return X[self.columns_keep]

## 3. `SelectFromModelDF`

In [7]:
from sklearn.feature_selection import SelectFromModel
from sklearn.base              import BaseEstimator,TransformerMixin

class SelectFromModelDF(BaseEstimator,TransformerMixin):

  def __init__(self, estimator, k=10):
    self.estimator = estimator
    self.k         = k

  def fit(self, X, y=None):
    try: 
      y_1d = y.flatten() # y is a numpy array
    except:
      try: 
        y_1d = y.values.flatten() # y is a pandas dataframe
      except: 
        print('SelectFromModelDF.fit: pre_est_obj.fit: y is not an array or dataframe')

    self.estimator.fit(X,y_1d) # y is a numpy array
        
    try: 
      coef_array      = self.estimator.coef_.flatten() 
    except:
      try: 
        coef_array    = self.estimator.feature_importances_.flatten()
      except:
        print('feature estimator has no coef_ or feature_importances_ attributes')
        
    var_nam           = list(X.columns)
    var_ndx_sorted    = coef_array.argsort()[::-1][:self.k]
    self.columns_keep = [var_nam[i] for i in var_ndx_sorted]
    self.columns_drop = list(set(X.columns)-set(self.columns_keep))
    return self
  
  def transform(self, X):
    return X[self.columns_keep]

## 4. `RFEDF`

In [9]:
from sklearn.feature_selection import RFE
from sklearn.base              import BaseEstimator,TransformerMixin
from itertools                 import compress

class RFEDF(BaseEstimator,TransformerMixin):

  def __init__(self, estimator, k=10, step=1, debug=False):
    self.estimator = estimator
    self.k         = k
    self.rfe       = RFE(estimator=estimator,
                         n_features_to_select=k,
                         step=step
                        )
    self.debug     = debug
    
    if self.debug:
      print('k RFEDF.init',self.k)

  def fit(self, X, y=None):
    try: 
      y_1d = y.flatten() # y is a numpy array
    except:
      try: 
        y_1d = y.values.flatten() # y is a pandas dataframe
      except: 
        print('RFEDF.fit: pre_est_obj.fit: y is not an array or dataframe')

    self.rfe.fit(X,y_1d) # y is a numpy array

    if self.debug:
      print('RFEDF.fit.ranking',self.rfe.ranking_)
      print('RFEDF.fit.columns',list(X.columns))
      
    self.columns_keep    = [list(X.columns)[i] for i in np.argsort(self.rfe.ranking_) if list(self.rfe.ranking_)[i]==1]
    self.columns_drop    = [list(X.columns)[i] for i in np.argsort(self.rfe.ranking_) if list(self.rfe.ranking_)[i] >1]
    
    if self.debug:
      print('RFEDF.fit.columns_keep',self.columns_keep)
      print('RFEDF.fit.columns_drop',self.columns_drop)
    return self
  
  def transform(self, X):
    return X[self.columns_keep]

## 5. `SFSDF `

In [11]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.base              import BaseEstimator,TransformerMixin

class SFSDF(BaseEstimator,TransformerMixin):

  def __init__(self, est_obj, k=10, step=1, debug=False):
    self.est_obj = est_obj
    self.k       = k
    self.sfs     = SFS(estimator=est_obj,
                       k_features=k
                      )
    self.debug     = debug
    
    if self.debug:
      print('k SFSDF init', self.k)

  def fit(self, X, y=None):
    try: 
      y_1d = y.flatten() # y is a numpy array
    except:
      try: 
        y_1d = y.values.flatten() # y is a pandas dataframe
      except: 
        print('SFSDF fit: y is not an array or dataframe')

    self.sfs.fit(X,y_1d) # y is a numpy array

    if self.debug:
      print('SFSDF fit X.columns',list(X.columns))
      
    self.columns_keep    = list(self.sfs.k_feature_names_)
    self.columns_drop    = list(set(X.columns) - set(self.columns_keep)) 
    
    if self.debug:
      print('SFSDF columns_keep',self.columns_keep)
      print('SFSDF columns_drop',self.columns_drop)
    return self
  
  def transform(self, X):
    return X[self.columns_keep]

__The End__