In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV

In [2]:
data = load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

### Random Forest

In [4]:
rf_classifier = RandomForestClassifier(random_state=1)

In [5]:
param_grid = {
    'n_estimators': np.arange(1, 201, 2),
    'max_depth': np.arange(1, 11, 1)
}

In [6]:
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, n_jobs=-1, verbose=0)
grid_search.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
best_rf_classifier = grid_search.best_estimator_

In [None]:
print("Melhores hiperparâmetros encontrados:", grid_search.best_params_)

In [None]:
rf_classifier = best_rf_classifier

In [None]:
y_pred = rf_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred, target_names=data.target_names)

In [None]:
accuracy

In [None]:
print(classification_report_result)

### Unique Decision Tree

In [None]:
dt_classifier = DecisionTreeClassifier(random_state=1)

In [None]:
dt_classifier.fit(X_train, y_train)

In [None]:
y_pred = dt_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred, target_names=data.target_names)

In [None]:
accuracy

In [None]:
print(classification_report_result)

### Visualização

In [None]:
depths = [estimator.tree_.max_depth for estimator in rf_classifier.estimators_]
avg_depth = sum(depths) / len(depths)
print(avg_depth)

In [None]:
importances = rf_classifier.feature_importances_
indices = np.argsort(importances)[::-1]

In [None]:
plt.figure(figsize=(12, 8))
plt.title("Feature Importances")
plt.bar(range(X.shape[1]), importances[indices], align="center")
plt.xticks(range(X.shape[1]), feature_names[indices], rotation=90)
plt.xlim([-1, X.shape[1]])
plt.show()