In [16]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix

import warnings
warnings.filterwarnings('ignore')



cancer = datasets.load_breast_cancer()
X, y = cancer.data, cancer.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LogisticRegression and hyperparameters for the random search
model = LogisticRegression()
param_dist = {'C': np.logspace(-3, 3, 7),
              'penalty': ['l1', 'l2']}

#cross-validation
cv = 5
search = RandomizedSearchCV(model, param_distributions=param_dist, cv=cv, n_iter=10, random_state=42)
search.fit(X_train, y_train)
print("Best hyperparameters:", search.best_params_)
best_model = search.best_estimator_
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
sensitivity = cm[1,1] / (cm[1,0] + cm[1,1])
specificity = cm[0,0] / (cm[0,0] + cm[0,1])

print("Accuracy:", accuracy)
print("Sensitivity:", sensitivity)
print("Specificity:", specificity)


Best hyperparameters: {'penalty': 'l2', 'C': 100.0}
Accuracy: 0.9649122807017544
Sensitivity: 0.9859154929577465
Specificity: 0.9302325581395349


In [22]:
# RandomForestClassifier and hyperparameters for the grid search
rf_model = RandomForestClassifier()
rf_param_grid = {'n_estimators': [50, 100, 200],
                 'max_depth': [3, 5, 7],
                 'min_samples_split': [2, 4, 6]}

# GridSearchCV for random forest
rf_search = GridSearchCV(rf_model, param_grid=rf_param_grid, cv=5)
rf_search.fit(X_train, y_train)
print("Best hyperparameters for Random Forest:", rf_search.best_params_)
rf_best_model = rf_search.best_estimator_
y_rf_pred = rf_best_model.predict(X_test)

# Evaluate random forest model
y_rf_pred = rf_best_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_rf_pred)
rf_cm = confusion_matrix(y_test, y_rf_pred)
rf_sensitivity = rf_cm[1,1] / (rf_cm[1,0] + rf_cm[1,1])
rf_specificity = rf_cm[0,0] / (rf_cm[0,0] + rf_cm[0,1])

print("Random Forest Model Evaluation Metrics:")
print("Accuracy:", rf_accuracy)
print("Sensitivity:", rf_sensitivity)
print("Specificity:", rf_specificity)


Best hyperparameters for Random Forest: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 50}
Random Forest Model Evaluation Metrics:
Accuracy: 0.956140350877193
Sensitivity: 0.971830985915493
Specificity: 0.9302325581395349


In [26]:
#Naive Bayes and hyperparameters for the grid search
nb_model = GaussianNB()
nb_param_grid = {'var_smoothing': np.logspace(0, -9, num=100)}

#GridSearchCV for naive bayes
nb_search = GridSearchCV(nb_model, param_grid=nb_param_grid, cv=5)
nb_search.fit(X_train, y_train)
print("Best hyperparameters for Naive Bayes:", nb_search.best_params_)
nb_best_model = nb_search.best_estimator_
y_nb_pred = nb_best_model.predict(X_test)

#Evaluate naive Bayes model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
sensitivity = cm[1,1] / (cm[1,0] + cm[1,1])
specificity = cm[0,0] / (cm[0,0] + cm[0,1])

print("Naive Bayes Model Evaluation Metrics:")
print("Accuracy:", accuracy)
print("Sensitivity:", sensitivity)
print("Specificity:", specificity)

Best hyperparameters for Naive Bayes: {'var_smoothing': 2.310129700083158e-09}
Naive Bayes Model Evaluation Metrics:
Accuracy: 0.9649122807017544
Sensitivity: 0.9859154929577465
Specificity: 0.9302325581395349
