In [2]:
# Variance-Based Feature Selection
from sklearn.feature_selection import VarianceThreshold

# Sample data
X = [[0, 0, 1], [0, 1, 0], [0, 0, 0], [0, 1, 1], [0, 1, 1]]

# Create VarianceThreshold object with a threshold of 0.8 * (1 - 0.8)
sel = VarianceThreshold(threshold=(0.8 * (1 - 0.8)))

# Fit and transform the data
X_transformed = sel.fit_transform(X)

# Print the transformed data
print(X_transformed)

[[0 1]
 [1 0]
 [0 0]
 [1 1]
 [1 1]]


In [4]:
# Statistical Model Selection
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# Load the Iris dataset
X, y = load_iris(return_X_y=True)
print(X.shape)  # Output: (150, 4)


(150, 4)


In [5]:
# Select the top 2 features using the chi-squared test
X_new = SelectKBest(chi2, k=2).fit_transform(X, y)
print(X_new.shape)  # Output: (150, 2)

(150, 2)


In [6]:
# Model-Based Feature Selection
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

# Load the Iris dataset
X, y = load_iris(return_X_y=True)
print(X.shape)  # Output: (150, 4)

(150, 4)


In [7]:
# Use LinearSVC to select features
lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_new_model = model.transform(X)
print(X_new_model.shape)  # Output will depend on the selected features

(150, 3)


In [8]:
# Use LinearSVC to select features
lsvc = LinearSVC(C=0.01, penalty="l2", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_new_model = model.transform(X)
print(X_new_model.shape)  # Output will depend on the selected features

(150, 2)


In [9]:
# Tree-based Feature Selection
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel

# Load the Iris dataset
X, y = load_iris(return_X_y=True)
print(X.shape)  # Output: (150, 4)

(150, 4)


In [11]:
# Train an ExtraTreesClassifier
clf = ExtraTreesClassifier(n_estimators=50)
clf.fit(X, y)

# Display feature importances
print(clf.feature_importances_)

[0.11198381 0.05922501 0.391778   0.43701319]


In [12]:
# Use SelectFromModel to select features based on the trained classifier
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(X)
print(X_new.shape)  # Output will depend on the selected features

(150, 2)
