In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a RandomForestClassifier
classifier = RandomForestClassifier(random_state=42)

# Train the classifier
classifier.fit(X_train, y_train)

# Get feature importances
feature_importances = classifier.feature_importances_

# Get the indices of the most important features
important_indices = np.argsort(feature_importances)[::-1]

# Get the names of the features
feature_names = data.feature_names

# Display the most important features and their importance scores
print("Feature Importances:")
for idx in important_indices:
    print(f"{feature_names[idx]}: {feature_importances[idx]:.4f}")


Feature Importances:
worst area: 0.1539
worst concave points: 0.1447
mean concave points: 0.1062
worst radius: 0.0780
mean concavity: 0.0680
worst perimeter: 0.0671
mean perimeter: 0.0533
mean radius: 0.0487
mean area: 0.0476
worst concavity: 0.0318
area error: 0.0224
worst texture: 0.0217
worst compactness: 0.0203
radius error: 0.0201
mean compactness: 0.0139
mean texture: 0.0136
perimeter error: 0.0113
worst smoothness: 0.0106
worst symmetry: 0.0101
concavity error: 0.0094
mean smoothness: 0.0073
fractal dimension error: 0.0053
compactness error: 0.0053
worst fractal dimension: 0.0052
texture error: 0.0047
smoothness error: 0.0043
symmetry error: 0.0040
mean fractal dimension: 0.0039
mean symmetry: 0.0038
concave points error: 0.0035


In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a RandomForestClassifier
classifier = RandomForestClassifier(random_state=42)

# Initialize RFE with the classifier and desired number of features
num_features_to_select = 10  # You can adjust this value
rfe = RFE(estimator=classifier, n_features_to_select=num_features_to_select)

# Fit RFE to the training data
rfe.fit(X_train, y_train)

# Get the ranking of features
feature_ranking = rfe.ranking_

# Get the selected features
selected_features = data.feature_names[rfe.support_]

print("Selected Features:")
for feature in selected_features:
    print(feature)


Selected Features:
mean texture
mean perimeter
mean concavity
mean concave points
worst radius
worst texture
worst perimeter
worst area
worst concavity
worst concave points
