In [1]:
class FeatureSelector():
    
    def __init__(self, estimator, cv, metric, use_recursion, fill_value, use_value):
        self.estimator = estimator
        self.cv = cv
        self.use_recursion = use_recursion
        self.metric = metric
        self.fill_value = fill_value
        self.use_value = use_value
        
    def fit(self, X, y):
        try:
            X[np.isnan(X)] = self.fill_value
        except:
            pass
        scores = []
        for col_idx in tqdm_notebook(range(X.shape[1])):
            series = X[:, col_idx]
            if use_value is None:
                scores.append((col_idx, None,\
                               cross_validate(self.estimator, series.reshape(-1,1), y_tr,\
                                              scoring =self.metric, cv = self.cv)['test_score'].mean()))
            else:                                
                if col_idx in use_value:
                    unique_values = np.unique(series[~np.isnan(series)])
                    for val in tqdm_notebook(unique_values):
                        _x = (series == val).astype(int).reshape(-1,1)
                        scores.append((col_idx, val, cross_validate(self.estimator, _x,\
                                              y, scoring =self.metric, cv = self.cv)['test_score'].mean()))
                else:
                    scores.append((col_idx, None,\
                                   cross_validate(self.estimator, series.reshape(-1,1), y_tr,\
                                                  scoring =self.metric, cv = self.cv)['test_score'].mean()))
                
        self.scores = scores
        order = []
        for row in sorted(scores, key=lambda x: x[-1], reverse = True):
            order.append((row[0], row[1]))
            
        D_selected = defaultdict(list)
        self.best_score = .5
        self.best_features = []
        self.to_drop = []

        for i in tqdm_notebook(range(len(order))):
            row = order[i]
            column, value = row[0], row[1]

            if value is not None:
                D_selected[column].append(value)
                L = []
                for feat, values in D_selected.items():
                    L.append(list(map(lambda x: x if x in values else -1, X[:, feat])))
                xcurr1 = np.column_stack(L)
            else:
                current_features = best_features + [order[i][0]]
                xcurr2 = X[:, current_features]
            try:
                xcurr = csc_matrix(hstack([xcurr1, xcurr2]))
            except:
                xcurr = xcurr1

            current_score = cross_validate(self.estimator, xcurr, y,\
                                              scoring =self.metric, cv = self.cv)['test_score'].mean()
            if current_score>self.best_score:
                self.best_score = current_score
                if value is not None:
                    pass
                else:
                    self.best_features = current_features
            else:
                if value is not None:
                    D_selected[column] = [val for val in D_selected[column] if val != value]
                    self.to_drop.append((column, value))
                else:
                    self.to_drop.append((column, value))

        
        
        if self.use_recursion:
            self.to_drop_before = self.to_drop
            self.to_drop_after=[]
            while True:
                for i in tqdm_notebook(range(len(self.to_drop_before))):                
                    row = self.to_drop_before[i]
                    column, value = row[0], row[1]
                    if value is not None:
                        D_selected[column].append(value)
                        L = []
                        for feat, values in D_selected.items():
                            L.append(list(map(lambda x: x if x in values else -1, xtr[:, feat])))
                        xcurr1 = np.column_stack(L)
                    else:
                        current_features = best_features + [order[i][0]]
                        xcurr2 = xtr[:, current_features]
                    try:
                        xcurr = csc_matrix(hstack([xcurr1, xcurr2]))
                    except:
                        xcurr = xcurr1       

                    current_score = cross_validate(self.estimator, xcurr, y,\
                                                  scoring =self.metric, cv = self.cv)['test_score'].mean()
                    if current_score>self.best_score:
                        self.best_score = current_score
                        if value is not None:
                            pass
                        else:
                            self.best_features = current_features
                    else:
                        if value is not None:
                            D_selected[column] = [val for val in D_selected[column] if val != value]
                            self.to_drop_after.append((column, value))
                        else:
                            self.to_drop_after.append((column, value))

                if len(self.to_drop_before) == len(self.to_drop_after):
                    break
                else:
                    self.to_drop_before = self.to_drop_after
                    self.to_drop_after = []
                
        self.best_features=best_features
        self.best_score = best_score        
        return self
    
    def transform(self, X):
        try:
            X[np.isnan(X)] = self.fill_value
        except:
            pass
        return X[:, self.best_features]
    
    def show_scores(self):
        return self.scores
    
    def show_selected(self):
        return self.best_features
    
    def show_best_score(self):
        return  self.best_score