In [1]:
!pip install explainableai
import os
os.environ['GOOGLE_API_KEY'] = 'API_KEY'



In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()

# Logistic Regression with 'saga' solver and hyperparameter tuning
logistic_regression_params = {
    'logistic_regression__C': [0.01, 0.1, 1, 10],
    'logistic_regression__penalty': ['l2', 'elasticnet'],
    'logistic_regression__l1_ratio': [0.1, 0.5, 0.9],  # Adding l1_ratio for elasticnet penalty
    'logistic_regression__solver': ['saga'],
    'logistic_regression__max_iter': [5000]
}
logistic_regression_pipeline = Pipeline([
    ('scaler', scaler),
    ('logistic_regression', LogisticRegression())
])

logistic_regression_gs = GridSearchCV(logistic_regression_pipeline, logistic_regression_params, cv=5)

# Random Forest with hyperparameter tuning
random_forest_params = {
    'n_estimators': [1000, 2000, 3000],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
random_forest = RandomForestClassifier(random_state=42)
random_forest_gs = GridSearchCV(random_forest, random_forest_params, cv=5)

# XGBoost with hyperparameter tuning
xgboost_params = {
    'n_estimators': [1000, 2000, 3000],
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 6, 9],
    'subsample': [0.7, 0.8, 1.0]
}
xgboost = XGBClassifier(random_state=42, use_label_encoder=False)
xgboost_gs = GridSearchCV(xgboost, xgboost_params, cv=5)

# Neural Network (MLP) with hyperparameter tuning
mlp_params = {
    'hidden_layer_sizes': [(100, 50), (128, 64, 32)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.001],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [3000]
}
mlp = MLPClassifier(random_state=42)
mlp_gs = GridSearchCV(mlp, mlp_params, cv=5)

# Fit the models
logistic_regression_gs.fit(X_train, y_train)
random_forest_gs.fit(X_train, y_train)
xgboost_gs.fit(X_train, y_train)
mlp_gs.fit(X_train, y_train)

# Evaluate the models on the test set
logistic_regression_best = logistic_regression_gs.best_estimator_
random_forest_best = random_forest_gs.best_estimator_
xgboost_best = xgboost_gs.best_estimator_
mlp_best = mlp_gs.best_estimator_

# Predictions
y_pred_log_reg = logistic_regression_best.predict(X_test)
y_pred_rf = random_forest_best.predict(X_test)
y_pred_xgb = xgboost_best.predict(X_test)
y_pred_mlp = mlp_best.predict(X_test)

# Accuracy scores
log_reg_acc = accuracy_score(y_test, y_pred_log_reg)
rf_acc = accuracy_score(y_test, y_pred_rf)
xgb_acc = accuracy_score(y_test, y_pred_xgb)
mlp_acc = accuracy_score(y_test, y_pred_mlp)

# Display the results
print(f"Logistic Regression (saga) Best Parameters: {logistic_regression_gs.best_params_}")
print(f"Logistic Regression (saga) Test Accuracy: {log_reg_acc * 100:.2f}%")

print(f"Random Forest Best Parameters: {random_forest_gs.best_params_}")
print(f"Random Forest Test Accuracy: {rf_acc * 100:.2f}%")

print(f"XGBoost Best Parameters: {xgboost_gs.best_params_}")
print(f"XGBoost Test Accuracy: {xgb_acc * 100:.2f}%")

print(f"Neural Network (MLP) Best Parameters: {mlp_gs.best_params_}")
print(f"Neural Network (MLP) Test Accuracy: {mlp_acc * 100:.2f}%")




KeyboardInterrupt: 

In [54]:
for model_name, model in models.items():
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    print(f"{model_name} Test Accuracy: {accuracy * 100:.2f}%")

Random Forest Test Accuracy: 96.49%
Logistic Regression (saga) Test Accuracy: 97.37%
XGBoost Test Accuracy: 95.61%
Neural Network (MLP) Test Accuracy: 94.74%
