In [9]:
#Import scikit-learn dataset library
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_score, confusion_matrix, accuracy_score
import pandas as pd

#Load dataset
cancer = datasets.load_breast_cancer()

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=1) # 70% training and 30% test

In [12]:
# Create a paramete dictionary for grid search for random forest
param_grid = {
    'n_estimators': [200],
    'max_features': ['log2'],
    'max_depth' : [7,8,9],
    'criterion' :['gini', 'entropy']
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy score: ", accuracy)
precision = precision_score(y_test, y_pred)
print("Precision: ", precision)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn+fp)
print("Specificity: ",specificity)

Best Hyperparameters: {'criterion': 'entropy', 'max_depth': 8, 'max_features': 'log2', 'n_estimators': 200}
Accuracy score:  0.956140350877193
Precision:  0.935064935064935
Specificity:  0.8809523809523809


In [13]:
import numpy as np

# Define the parameter distributions
param_dist = {'n_estimators': np.arange(50, 250, 50), 'learning_rate': np.logspace(-4, 0, 5)}

# Perform randomized search using 5-fold cross validation
random_search = GridSearchCV(GradientBoostingClassifier(random_state=42), param_dist, cv=5)
random_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", random_search.best_params_)
y_pred = random_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy score: ", accuracy)
precision = precision_score(y_test, y_pred)
print("Precision: ", precision)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn+fp)
print("Specificity: ",specificity)

Best Hyperparameters: {'learning_rate': 0.1, 'n_estimators': 200}
Accuracy score:  0.9649122807017544
Precision:  0.9473684210526315
Specificity:  0.9047619047619048
