In [1]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load data
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Feature Scaling
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df.drop('target', axis=1))
df_scaled = pd.DataFrame(df_scaled, columns=data.feature_names)

# Save the processed data
df_scaled['target'] = df['target']
df_scaled.to_csv('breast_cancer_processed.csv', index=False)

In [3]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif

# Load the processed dataset
df = pd.read_csv('breast_cancer_processed.csv')

# Feature Selection
selector = SelectKBest(score_func=f_classif, k=10)
X_new = selector.fit_transform(df.drop('target', axis=1), df['target'])

# Save the selected features
selected_features = pd.DataFrame(X_new, columns=[df.columns[i] for i in selector.get_support(indices=True)])
selected_features['target'] = df['target']
selected_features.to_csv('breast_cancer_selected_features.csv', index=False)

In [4]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

# Load the dataset with selected features
df = pd.read_csv('breast_cancer_selected_features.csv')

# Define the model
model = MLPClassifier()

# Define the parameter grid
param_grid = {
    'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
}

# Set up Grid Search Cross-Validation
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, cv=5)
grid_search.fit(df.drop('target', axis=1), df['target'])

# Save the best model
import joblib
joblib.dump(grid_search.best_estimator_, 'best_ann_model.pkl')

# Output the best parameters
print("Best parameters found: ", grid_search.best_params_)



Best parameters found:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'solver': 'adam'}




In [5]:
import pandas as pd
import joblib
from sklearn.metrics import classification_report

# Load the dataset with selected features
df = pd.read_csv('breast_cancer_selected_features.csv')

# Load the best model
best_model = joblib.load('best_ann_model.pkl')

# Train the model (optional, as the model is already trained via GridSearchCV)
best_model.fit(df.drop('target', axis=1), df['target'])

# Evaluate the model
predictions = best_model.predict(df.drop('target', axis=1))
print(classification_report(df['target'], predictions))

# Save the evaluation results
with open('model_evaluation.txt', 'w') as f:
    f.write(classification_report(df['target'], predictions))

              precision    recall  f1-score   support

           0       0.98      0.94      0.96       212
           1       0.97      0.99      0.98       357

    accuracy                           0.97       569
   macro avg       0.97      0.96      0.97       569
weighted avg       0.97      0.97      0.97       569



