In [1]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

In [2]:
class SelectKBest:
    def __init__(self, score_func, k='all'):
        self.score_func = score_func
        self.k = k
        self.scores_ = None
        self.selected_features_ = None

    def fit(self, X, y):
        y = y.ravel()
        self.scores_ = np.array([self.score_func(X.iloc[:, i], y) for i in range(X.shape[1])])

        if self.k == 'all':
            self.k = X.shape[1]

        self.selected_indices_ = np.argsort(self.scores_)[-self.k:]
        self.selected_features_ = X.columns[self.selected_indices_]

        return self

    def transform(self, X):
        return X.iloc[:, self.selected_indices_]

    def fit_transform(self, X, y):
        return self.fit(X, y).transform(X)

def chi2_score(x, y):
    contingency_table = pd.crosstab(x, y)
    chi2_stat, _, _, _ = chi2_contingency(contingency_table)
    return chi2_stat

In [3]:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_X_y, check_array

In [4]:
class RFE:
    def __init__(self, estimator, n_features_to_select=1):
        self.estimator = estimator
        self.n_features_to_select = n_features_to_select
        self.support_ = None
        self.ranking_ = None

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        n_features = X.shape[1]
        self.ranking_ = np.zeros(n_features)

        while n_features > self.n_features_to_select:
            self.estimator.fit(X, y)
            importances = self.estimator.feature_importances_ if hasattr(self.estimator, 'feature_importances_') else np.abs(self.estimator.coef_)
            worst_feature = np.argmin(importances)

            self.ranking_[worst_feature] += 1
            X = np.delete(X, worst_feature, axis=1)
            n_features -= 1

        self.support_ = np.where(self.ranking_ == 0)[0]
        return self

    def transform(self, X):
        return X.iloc[:, self.support_]

    def fit_transform(self, X, y):
        return self.fit(X, y).transform(X)

In [5]:
from sklearn.linear_model import Lasso
from sklearn.utils import check_X_y

In [6]:
class L1FeatureSelector:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.model = Lasso(alpha=self.alpha)
        self.selected_features_ = None

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.model.fit(X, y)
        self.selected_features_ = np.where(self.model.coef_ != 0)[0]
        return self

    def transform(self, X):
        return X.iloc[:, self.selected_features_]

    def fit_transform(self, X, y):
        return self.fit(X, y).transform(X)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import check_X_y

In [8]:
class TreeBasedFeatureSelector:
    def __init__(self, n_estimators=100):
        self.n_estimators = n_estimators
        self.model = RandomForestClassifier(n_estimators=self.n_estimators)
        self.selected_features_ = None

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.model.fit(X, y)
        importances = self.model.feature_importances_
        self.selected_features_ = np.where(importances > 0)[0]
        return self

    def transform(self, X):
        return X.iloc[:, self.selected_features_]

    def fit_transform(self, X, y):
        return self.fit(X, y).transform(X)

In [9]:
# Sample Data
import pandas as pd
from sklearn.datasets import load_iris

data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Univariate Feature Selection
selector_kbest = SelectKBest(score_func=chi2_score, k=2)
X_kbest = selector_kbest.fit_transform(X, y)
print("Selected Features (KBest):", selector_kbest.selected_features_)

# Recursive Feature Elimination
rfe_selector = RFE(estimator=RandomForestClassifier(), n_features_to_select=2)
X_rfe = rfe_selector.fit_transform(X, y)
print("Selected Features (RFE):", X.columns[rfe_selector.support_])

# L1-Based Feature Selection
l1_selector = L1FeatureSelector(alpha=0.1)
X_l1 = l1_selector.fit_transform(X, y)
print("Selected Features (L1):", X.columns[l1_selector.selected_features_])

# Tree-Based Feature Selection
tree_selector = TreeBasedFeatureSelector()
X_tree = tree_selector.fit_transform(X, y)
print("Selected Features (Tree-Based):", X.columns[tree_selector.selected_features_])

Selected Features (KBest): Index(['petal width (cm)', 'petal length (cm)'], dtype='object')
Selected Features (RFE): Index(['petal length (cm)', 'petal width (cm)'], dtype='object')
Selected Features (L1): Index(['petal length (cm)'], dtype='object')
Selected Features (Tree-Based): Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')
