In [12]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV  # Import GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import numpy as np
 
# Load the breast_cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target
 
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
 
# Tune hyperparameters using GridSearchCV
param_grid = {
    'n_estimators': [50, 150, 200, 10],
    'criterion': ['gini', 'entropy', 'log_loss'],
}
 
clf = RandomForestClassifier(random_state=0)
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_



# Train a RandomForestClassifier with the best hyperparameters
best_clf = RandomForestClassifier(random_state=0, **best_params)
best_clf.fit(X_train, y_train)
num_features = best_clf.n_classes_
 
# Perform 5-fold cross-validation to get an average accuracy score
scores = cross_val_score(best_clf, X_train, y_train, cv=5)
 
# Calculate the average accuracy and accuracy per feature
avg_accuracy = np.mean(scores)
accuracy_per_feature = avg_accuracy / num_features
 
print(X.shape)
print(f"Best Hyperparameters: {best_params}")
print(f"Accuracy: {avg_accuracy:.2f}")
print(f"Accuracy per feature: {accuracy_per_feature:.2f}")
print(f"Number of features: {num_features}")

(569, 30)
Best Hyperparameters: {'criterion': 'entropy', 'n_estimators': 150}
Accuracy: 0.96
Accuracy per feature: 0.48
Number of features: 2
