In [1]:
"""
Feature Selection Techniques:
1. Recursive Feature Elimination (RFE)
2. Correlation-based Feature Selection (CFS)
3. Mutual Information
4. Lasso Regression (L1 regularization)
"""

import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression, LassoCV
from sklearn.feature_selection import RFE, mutual_info_classif
from sklearn.model_selection import train_test_split

# --- Create sample dataset ---
X, y = make_classification(n_samples=300, n_features=10, 
                           n_informative=5, n_redundant=2, 
                           n_classes=2, random_state=42)

feature_names = [f"feature_{i}" for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

# --- 1) Recursive Feature Elimination (RFE) ---
model = LogisticRegression(max_iter=2000, solver='lbfgs')
rfe = RFE(model, n_features_to_select=5)
rfe.fit(X, y)
rfe_selected = [feature_names[i] for i in range(len(feature_names)) if rfe.support_[i]]

print("RFE Selected Features:", rfe_selected)

# --- 2) Correlation-based Feature Selection (CFS) ---
corr_matrix = df.corr()
target_corr = corr_matrix['target'].drop('target').abs()  # correlation with target

# Select top 5 features with low intercorrelation
sorted_features = target_corr.sort_values(ascending=False)
selected_cfs = []
for f in sorted_features.index:
    if all(abs(corr_matrix[f][sf]) < 0.7 for sf in selected_cfs):
        selected_cfs.append(f)
    if len(selected_cfs) == 5:
        break

print("CFS Selected Features:", selected_cfs)

# --- 3) Mutual Information ---
mi_scores = mutual_info_classif(X, y, random_state=42)
mi_ranking = sorted(zip(feature_names, mi_scores), key=lambda x: x[1], reverse=True)
selected_mi = [f for f, score in mi_ranking[:5]]

print("Mutual Information Selected Features:", selected_mi)

# --- 4) Lasso Regression for feature selection ---
lasso = LassoCV(cv=5, random_state=42).fit(X, y)
lasso_selected = [feature_names[i] for i, coef in enumerate(lasso.coef_) if coef != 0]

print("Lasso Selected Features:", lasso_selected)

RFE Selected Features: ['feature_0', 'feature_3', 'feature_5', 'feature_7', 'feature_8']
CFS Selected Features: ['feature_0', 'feature_1', 'feature_8', 'feature_7', 'feature_5']
Mutual Information Selected Features: ['feature_9', 'feature_0', 'feature_1', 'feature_7', 'feature_8']
Lasso Selected Features: ['feature_0', 'feature_1', 'feature_3', 'feature_5', 'feature_6', 'feature_7', 'feature_8']
