 Dataset Acquisition

In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.to_csv('breast_cancer.csv', index=False)


 Data Preparation

In [3]:
# Split into features and target
X = df.drop('target', axis=1)
y = df['target']


In [5]:

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Feature Selection


In [6]:
from sklearn.feature_selection import SelectKBest, f_classif

# Select top k features
k = 10  # Number of top features to select
selector = SelectKBest(score_func=f_classif, k=k)
X_selected = selector.fit_transform(X_scaled, y)

# Get selected feature names
selected_features = X.columns[selector.get_support()]
print("Selected features:", selected_features)


Selected features: Index(['mean radius', 'mean perimeter', 'mean area', 'mean concavity',
       'mean concave points', 'worst radius', 'worst perimeter', 'worst area',
       'worst concavity', 'worst concave points'],
      dtype='object')


Grid Search CV for Model Tuning


In [7]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

# Define parameter grid
param_grid = {
    'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant', 'adaptive'],
}

# Setup Grid Search
grid_search = GridSearchCV(MLPClassifier(max_iter=100), param_grid, n_jobs=-1, cv=3)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Perform Grid Search
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print("Best parameters found:", best_params)


Best parameters found: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 100, 50), 'learning_rate': 'adaptive', 'solver': 'adam'}




 Implementing an Artificial Neural Network (ANN) Model

In [8]:
# Train model with best parameters
model = MLPClassifier(**best_params, max_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")


Model Accuracy: 0.9649122807017544


In [9]:
import joblib

# Train model with best parameters
model = MLPClassifier(**best_params, max_iter=1000)
model.fit(X_train, y_train)

# Save the model to a file
joblib.dump(model, 'breast_cancer_model.pkl')


['breast_cancer_model.pkl']