In [123]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [125]:
RANDOM_STATE = 42
# Load dataset
cancer = datasets.load_breast_cancer()

In [127]:
# Convert to DataFrame
df = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)
df['target'] = cancer.target # Add target column (0: Malignant, 1: Benign)
# Check for missing values
print(df.isnull().sum())

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
target                     0
dtype: int64


In [129]:
# Split data into features and target
X = df.drop(columns=['target'])
y = df['target']

# Split into training and test set (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Data Preprocessing Completed!")


Data Preprocessing Completed!


In [131]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

# Define GBC model
gbc = GradientBoostingClassifier(random_state=42)

# Define hyperparameter grid
param_grid_gbc = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 10]
}

# Perform Grid Search CV
grid_search_gbc = GridSearchCV(gbc, param_grid_gbc, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_gbc.fit(X_train, y_train)

# Get best model
best_gbc = grid_search_gbc.best_estimator_

# Train best model
best_gbc.fit(X_train, y_train)

print("Best Gradient Boosting Parameters:", grid_search_gbc.best_params_)


Best Gradient Boosting Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}


In [132]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
# Define Random Forest model
rf = RandomForestClassifier(random_state=42)
# Hyperparameter tuning using GridSearch
param_grid = {
 'n_estimators': [50, 100, 200],
 'max_depth': [None, 10, 20],
 'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_rf = grid_search.best_estimator_
best_rf.fit(X_train, y_train)
print("Best Random Forest Model:", grid_search.best_params_)

Best Random Forest Model: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


In [133]:
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV

RANDOM_STATE = 42

# Define SVM model
svm = SVC(probability=True, random_state=42)

# Hyperparameter tuning using RandomizedSearch
param_dist = {
 'C': [0.1, 1, 10, 100],
 'kernel': ['linear', 'rbf', 'poly'],
 'gamma': ['scale', 'auto']
}

random_search = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=10)
random_search.fit(X_train, y_train)
best_svm = random_search.best_estimator_
best_svm.fit(X_train, y_train)
print("Best SVM Model:", random_search.best_params_)


Best SVM Model: {'kernel': 'linear', 'gamma': 'scale', 'C': 1}


In [136]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to evaluate models
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n🔹 {model_name} Performance Metrics:")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print(f"Precision: {precision_score(y_true, y_pred):.4f}")
    print(f"Recall: {recall_score(y_true, y_pred):.4f}")
    print(f"F1-Score: {f1_score(y_true, y_pred):.4f}")

evaluate_model(y_test, best_gbc.predict(X_test), "Gradient Boosting Classifier") 
evaluate_model(y_test, best_rf.predict(X_test), "Random Forest Classifier")     
evaluate_model(y_test, best_svm.predict(X_test), "Support Vector Machine")      



🔹 Gradient Boosting Classifier Performance Metrics:
Accuracy: 0.9561
Precision: 0.9583
Recall: 0.9718
F1-Score: 0.9650

🔹 Random Forest Classifier Performance Metrics:
Accuracy: 0.9649
Precision: 0.9589
Recall: 0.9859
F1-Score: 0.9722

🔹 Support Vector Machine Performance Metrics:
Accuracy: 0.9561
Precision: 0.9714
Recall: 0.9577
F1-Score: 0.9645
