In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer  # Step 1: Data
from sklearn.model_selection import train_test_split, GridSearchCV  # Step 3 & 5: Train-Test Split & Hyperparameter Optimization
from sklearn.preprocessing import StandardScaler  # Step 2: Preprocessing
from sklearn.neural_network import MLPClassifier  # Step 4: Choose Network Architecture
from sklearn.metrics import classification_report, confusion_matrix  # Step 7 & 8: Validate Assumptions & Evaluate

In [2]:
# Step 1: Data
# ===============================
data = load_breast_cancer()
X, y = data.data, data.target

In [3]:
# Step 2: Preprocessing
# ===============================
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Step 3: Train-Test Split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [5]:
# Step 4: Choose Network Architecture
# ===============================
# Initialize an MLPClassifier. Here, we start with a default architecture.
mlp = MLPClassifier(max_iter=1000, random_state=42)

In [6]:
# Step 5: Optimize Hyperparameters
# ===============================
# Define a grid of hyperparameters to search over
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 30)],  # Different network architectures
    'activation': ['relu', 'tanh'],                   # Activation functions
    'alpha': [0.0001, 0.001],                         # L2 penalty (regularization term) parameter
    'learning_rate_init': [0.001, 0.01]               # Initial learning rate
}

grid_search = GridSearchCV(mlp, param_grid, cv=3, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_mlp = grid_search.best_estimator_
print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.001}


In [7]:
# Step 6: Compute Predictions
# ===============================
y_pred = best_mlp.predict(X_test)

In [8]:
# Step 7: Validate Assumptions
# ===============================
# For classification, we validate by checking the confusion matrix and classification report.
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[41  2]
 [ 1 70]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



In [9]:
# Step 8: Evaluate
# ===============================
accuracy = best_mlp.score(X_test, y_test)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))

Test Accuracy: 97.37%


In [10]:
# Step 9: Predict New Data
# ===============================
# Assume new_data is a new set of observations.
# For demonstration, we use the first 5 samples from the test set.
new_data = X_test[:5]
new_predictions = best_mlp.predict(new_data)
print("Predictions on New Data:", new_predictions)

Predictions on New Data: [1 0 0 1 1]
