In [1]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.6-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.6


In [5]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve, validation_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


In [6]:
def grab_breast_cancer_dataset():
  # fetch dataset
  breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)
  # data (as pandas dataframes)
  X = breast_cancer_wisconsin_diagnostic.data.features
  y = breast_cancer_wisconsin_diagnostic.data.targets

  return X,y

def preprocess_data(X, y, test_size=0.2, random_state=None):

  #Numerize y data
  y['Diagnosis'] = y['Diagnosis'].map({'M': 1, 'B': 0})

  # Split the data into training and testing sets
  x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
  y_train = y_train.values.reshape(-1)
  y_test = y_test.values.reshape(-1)


  return x_train, x_test, y_train, y_test

def evaluate_classifier(classifier, x_train, y_train, x_test, y_test):
    # Make predictions on the training data
    y_pred = classifier.predict(x_train)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_train, y_pred)
    precision = precision_score(y_train, y_pred)
    recall = recall_score(y_train, y_pred)
    f1 = f1_score(y_train, y_pred)

    #Print the metrics: Training
    print("Training Results")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")


    # Make predictions on the testing data
    y_pred = classifier.predict(x_test)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Print the metrics
    print("Validation Test Results")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")

    return accuracy, precision, recall, f1


In [4]:
#Functions for Nueral Network

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

def grid_search_neural_network(X_train, y_train, cv=5):

    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],  # Number of neurons in each hidden layer
        'activation': ['relu', 'tanh'],                                 # Activation function
        'solver': ['adam', 'sgd'],                                      # Solver for weight optimization
        'learning_rate': ['constant', 'adaptive'],                      # Learning rate schedule
        'alpha': [0.0001, 0.001, 0.01],                                 # L2 penalty (regularization term)
    }

    param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'learning_rate': ['constant'],
    'alpha': [0.0001, 0.001, 0.01],
    }

    nn_classifier = MLPClassifier(max_iter=1000)

    grid_search = GridSearchCV(nn_classifier, param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_

    return best_params, best_estimator


In [8]:
# This is the best nueral network: best_nn_classifier
#hyperparameters: Best Hyperparameters for Nueral Network: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}

#Baseline
import time


X,y = grab_breast_cancer_dataset()
x_train, x_test, y_train, y_test = preprocess_data(X, y, test_size=0.2, random_state=42)
# Measure the start time
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

# Measure the end time
end_time = time.time()

evaluate_classifier(nn_classifier, x_train, y_train, x_test, y_test)

# Calculate the execution time
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.2f} seconds")



Training Results
Accuracy: 0.9319
F1 Score: 0.9063
Validation Test Results
Accuracy: 0.9649
F1 Score: 0.9524
Execution time: 1.25 seconds




In [11]:
#Apply PCA to Reduce Dataset then run Nueral Network Again

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

print("PCA")

X,y = grab_breast_cancer_dataset()

scaler = StandardScaler()
X = scaler.fit_transform(X)

pca = PCA(n_components=10)
X = pca.fit_transform(X)

x_train, x_test, y_train, y_test = preprocess_data(X, y, test_size=0.2, random_state=42)

# Measure the start time
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

# Measure the end time
end_time = time.time()

evaluate_classifier(nn_classifier, x_train, y_train, x_test, y_test)

# Calculate the execution time
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.2f} seconds")




PCA
Training Results
Accuracy: 0.9912
F1 Score: 0.9880
Validation Test Results
Accuracy: 0.9737
F1 Score: 0.9655
Execution time: 0.93 seconds




In [17]:
#Apply ICA to Reduce Dataset then run Nueral Network Again

from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA

print("ICA")


X,y = grab_breast_cancer_dataset()

scaler = StandardScaler()
X = scaler.fit_transform(X)

ica = FastICA(n_components=30, random_state=42)  # Specify the number of components to keep
X = ica.fit_transform(X)


x_train, x_test, y_train, y_test = preprocess_data(X, y, test_size=0.2, random_state=42)

# Measure the start time
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

# Measure the end time
end_time = time.time()

evaluate_classifier(nn_classifier, x_train, y_train, x_test, y_test)

# Calculate the execution time
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.2f} seconds")




ICA




Training Results
Accuracy: 0.9714
F1 Score: 0.9605
Validation Test Results
Accuracy: 0.9561
F1 Score: 0.9398
Execution time: 1.22 seconds




In [16]:
#Apply Random Projection to Reduce Dataset then run Nueral Network Again

from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.random_projection import GaussianRandomProjection


print("Random Projection")


X,y = grab_breast_cancer_dataset()

scaler = StandardScaler()
X = scaler.fit_transform(X)

rp = GaussianRandomProjection(n_components=30)
X = rp.fit_transform(X)


x_train, x_test, y_train, y_test = preprocess_data(X, y, test_size=0.2, random_state=42)

# Measure the start time
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

# Measure the end time
end_time = time.time()

evaluate_classifier(nn_classifier, x_train, y_train, x_test, y_test)

# Calculate the execution time
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.2f} seconds")




Random Projection
Training Results
Accuracy: 0.9736
F1 Score: 0.9641
Validation Test Results
Accuracy: 0.9474
F1 Score: 0.9302
Execution time: 1.44 seconds




In [21]:
#Apply Isomap to Reduce Dataset then run Nueral Network Again

from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.random_projection import GaussianRandomProjection
from sklearn.manifold import Isomap


print("Isomap")


X,y = grab_breast_cancer_dataset()

scaler = StandardScaler()
X = scaler.fit_transform(X)

isomap = Isomap(n_components=2)
X = isomap.fit_transform(X)


x_train, x_test, y_train, y_test = preprocess_data(X, y, test_size=0.2, random_state=42)

# Measure the start time
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

# Measure the end time
end_time = time.time()

evaluate_classifier(nn_classifier, x_train, y_train, x_test, y_test)

# Calculate the execution time
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.2f} seconds")




Isomap
Training Results
Accuracy: 0.9670
F1 Score: 0.9552
Validation Test Results
Accuracy: 0.9649
F1 Score: 0.9512
Execution time: 0.37 seconds


Step 5

In [26]:
from sklearn.cluster import KMeans
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import numpy as np
import time

# Fetch the dataset
X,y = grab_breast_cancer_dataset()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply Isomap to reduce the dataset
isomap = Isomap(n_components=2)
X_reduced = isomap.fit_transform(X_scaled)

# Cluster the reduced dataset with KMeans
kmeans = KMeans(n_clusters=2, random_state=42)
cluster_labels = kmeans.fit_predict(X_reduced)

# Concatenate cluster labels with the reduced dataset
X_with_clusters = np.column_stack((X_reduced, cluster_labels))

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(X_with_clusters, y, test_size=0.2, random_state=42)

# Train the neural network classifier
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

end_time = time.time()

# Evaluate the classifier
train_accuracy = nn_classifier.score(x_train, y_train)
test_accuracy = nn_classifier.score(x_test, y_test)
execution_time = end_time - start_time

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Execution Time: {execution_time:.2f} seconds")


  y = column_or_1d(y, warn=True)


Train Accuracy: 0.9670
Test Accuracy: 0.9649
Execution Time: 1.22 seconds


In [28]:
from sklearn.cluster import KMeans
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import numpy as np
import time
from sklearn.mixture import GaussianMixture


# Fetch the dataset
X,y = grab_breast_cancer_dataset()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply Isomap to reduce the dataset
isomap = Isomap(n_components=2)
X_reduced = isomap.fit_transform(X_scaled)

gmm = GaussianMixture(n_components=2, random_state=10)
cluster_labels = gmm.fit_predict(X_reduced)

# Concatenate cluster labels with the reduced dataset
X_with_clusters = np.column_stack((X_reduced, cluster_labels))

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(X_with_clusters, y, test_size=0.2, random_state=42)

# Train the neural network classifier
start_time = time.time()

nn_classifier = MLPClassifier(
    activation='relu',
    alpha=0.01,
    hidden_layer_sizes=(50,),
    learning_rate='constant',
    solver='adam'
)

nn_classifier.fit(x_train, y_train)

end_time = time.time()

# Evaluate the classifier
train_accuracy = nn_classifier.score(x_train, y_train)
test_accuracy = nn_classifier.score(x_test, y_test)
execution_time = end_time - start_time

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Execution Time: {execution_time:.2f} seconds")


  y = column_or_1d(y, warn=True)


Train Accuracy: 0.9714
Test Accuracy: 0.9649
Execution Time: 0.88 seconds
