In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and fit the RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Select important features, setting prefit=True
selector = SelectFromModel(model, threshold="mean", prefit=True)
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

# Display selected features and feature importances
print("Selected Features (SelectFromModel):", X_train_selected.shape[1])
print("Feature Importances:", model.feature_importances_)


Selected Features (SelectFromModel): 9
Feature Importances: [0.03231189 0.0110639  0.06009233 0.05381045 0.00622336 0.00921566
 0.08055702 0.14193444 0.00327807 0.00314028 0.01643496 0.00317191
 0.01176976 0.02953842 0.00588079 0.00459638 0.0058159  0.00338232
 0.00400077 0.00713457 0.07797475 0.01878567 0.07429212 0.11821686
 0.01176917 0.01753909 0.04107958 0.12713638 0.01292945 0.00692376]


In [11]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Logistic Regression model
model = LogisticRegression(max_iter=4000, random_state=42)

# Use RFE with Logistic Regression to select top 10 features
rfe = RFE(estimator=model, n_features_to_select=10)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)  # Apply the same transformation to the test set

# Display the selected features and rankings
print("Selected Features (RFE):", X_train_rfe.shape[1])
print("Selected Feature Rankings:", rfe.ranking_)
print("Features selected (boolean mask):", rfe.support_)


Selected Features (RFE): 10
Selected Feature Rankings: [ 1  4 10 17  6  1  1  1  5 14 12  2  1  8 18 19 13 16 15 21  9  3  7 20
  1  1  1  1  1 11]
Features selected (boolean mask): [ True False False False False  True  True  True False False False False
  True False False False False False False False False False False False
  True  True  True  True  True False]
