In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
import itertools
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier


In [2]:
def initialize_weights(nodes):
    """Initialize weights with random values in [-1, 1] (including bias)"""
    layers, weights = len(nodes), []
     
    
    for i in range(1, layers):
        w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    
    return weights

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return np.multiply(x, 1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)






In [4]:
def forward_propagation(x, weights, layers, activation_function):
    activations, layer_input = [x], x
    for j in range(layers):
        if activation_function == 'sigmoid':
            activation_result = sigmoid(np.dot(layer_input, weights[j].T))
        elif activation_function == 'relu':
            activation_result = relu(np.dot(layer_input, weights[j].T))
       
        else:
            raise ValueError(f"Unsupported activation function: {activation_function}")

        activations.append(activation_result)
        layer_input = np.append(1, activation_result)  # Augment with bias

    return activations



In [5]:
def back_propagation(y, activations, weights, layers, lr,function_derivate):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal) # Error at output
    
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            # Augment previous activation
            prevActivation = np.append(1, activations[j-1])
        else:
            # First hidden layer, prevActivation is input (without bias)
            prevActivation = activations[0]
        
        delta = np.multiply(error, function_derivate(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
        error = np.dot(delta, w) # Calculate error for current layer
    
    return weights

In [6]:
def train(X, Y, lr, weights,activation_function):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x)) # Augment feature vector
        activations = forward_propagation(x, weights, layers,activation_function)
        if activation_function == 'sigmoid':

            weights = back_propagation(y, activations, weights, layers, lr,sigmoid_derivative)
       
        else:
            weights = back_propagation(y, activations, weights, layers, lr,relu_derivative)
       



            

    return weights


In [7]:
def predict(item, weights, activation_function):
    layers = len(weights)
    item = np.append(np.ones(1), item)  # Augment feature vector
    activations = forward_propagation(item, weights,layers, activation_function)
    output_final = activations[-1].A1
    index = np.argmax(output_final)

    y = np.zeros(len(output_final))
    y[index] = 1

    return y



In [8]:
def accuracy(X, Y, weights, activation_function):
    correct = 0

    for i in range(len(X)):
        x, y = X[i], Y[i]
        guess = predict(x, weights, activation_function)

        if np.array_equal(y, guess):
            correct += 1

    return correct / len(X)

In [11]:

import itertools

def grid_search(X_train, Y_train, X_test, Y_test, activation_functions, learning_rates):
    f = len(X_train[0])  # Number of features
    o = len(Y_train[0])  # Number of outputs / classes

    min_layers = 2
    max_layers = 4
    min_nodes = 3
    max_nodes = 5
    scores = {}

    best_accuracy = 0
    best_params = {}

    for activation_function in activation_functions:
        for num_layers in range(min_layers, max_layers+1):
            nodes_range = range(min_nodes, max_nodes + 1)
            layer_combinations = itertools.product(nodes_range, repeat=num_layers - 1)

            for combination in layer_combinations:
                for lr in learning_rates:  # Iterate over learning rates
                    layers = [f] + list(combination) + [o]
                    print(f"Configuration: {layers}, Learning Rate: {lr}")

                    epochs = 100
                    weights = initialize_weights(layers)

                    for _ in range(epochs):
                        weights = train(X_train, Y_train, lr, weights, activation_function)

                    score = accuracy(X_test, Y_test, weights, activation_function)
                    layers_tuple = tuple(layers)
                    scores[layers_tuple + (lr,)] = {'activation_function': activation_function, 'score': score}
                    print(f"Testing Accuracy with {activation_function} and Learning Rate {lr}: {score}")
                    print("--------------------------------------------")

                    # Update best parameters if accuracy is improved
                    if score > best_accuracy:
                        best_accuracy = score
                        best_params = {
                            'activation_function': activation_function,
                            'layers': layers,
                            'learning_rate': lr,
                            'epochs': epochs,
                            'weights': weights
                        }

    print(f"Best Configuration: {best_params['layers']} with {best_params['activation_function']} activation function and Learning Rate {best_params['learning_rate']}")
    return best_params, best_accuracy


In [12]:
from sklearn.model_selection import KFold
import itertools

def grid_search_cv(X, Y, activation_functions, learning_rates, num_folds=5):
    f = len(X[0])  # Number of features
    o = len(Y[0])  # Number of outputs / classes

    min_layers = 2
    max_layers = 4
    min_nodes = 3
    max_nodes = 5
    scores = {}

    best_accuracy = 0
    best_params = {}

    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    for activation_function in activation_functions:
        for num_layers in range(min_layers, max_layers + 1):
            nodes_range = range(min_nodes, max_nodes + 1)
            layer_combinations = itertools.product(nodes_range, repeat=num_layers - 1)

            for combination in layer_combinations:
                for lr in learning_rates:
                    layers = [f] + list(combination) + [o]
                    print(f"Configuration: {layers}, Learning Rate: {lr}")

                    total_accuracy = 0

                    for train_idx, test_idx in kf.split(X):
                        X_train, X_val = X[train_idx], X[test_idx]
                        Y_train, Y_val = Y[train_idx], Y[test_idx]

                        epochs = 100
                        weights = initialize_weights(layers)

                        for _ in range(epochs):
                            weights = train(X_train, Y_train, lr, weights, activation_function)

                        # Calculate accuracy using a different variable name
                        acc = accuracy(X_val, Y_val, weights, activation_function)
                        total_accuracy += acc

                    avg_accuracy = total_accuracy / num_folds
                    layers_tuple = tuple(layers)
                    scores[layers_tuple + (lr,)] = {'activation_function': activation_function, 'score': avg_accuracy}
                    print(f"Cross-validated Accuracy with {activation_function} and Learning Rate {lr}: {avg_accuracy}")
                    print("--------------------------------------------")

                    # Update best parameters if accuracy is improved
                    if avg_accuracy > best_accuracy:
                        best_accuracy = avg_accuracy
                        best_params = {
                            'activation_function': activation_function,
                            'layers': layers,
                            'learning_rate': lr,
                            'epochs': epochs,
                            'weights': weights
                        }

    print(f"Best Configuration: {best_params['layers']} with {best_params['activation_function']} activation function and Learning Rate {best_params['learning_rate']}")
    return best_params, best_accuracy






In [13]:
def predict_with_params(item, best_params):
    weights = best_params['weights']
    activation_function = best_params['activation_function']

    layers = len(weights)
    item = np.append(np.ones(1), item)  # Augment feature vector
    activations = forward_propagation(item, weights, layers, activation_function)
    output_final = activations[-1].A1
    index = np.argmax(output_final)

    y = np.zeros(len(output_final))
    y[index] = 1

    return y

In [14]:
def run_with_best_params(X_test, best_params):
    predictions = []
    for item in X_test:
        prediction = predict_with_params(item, best_params)
        predictions.append(prediction)

    return predictions

In [15]:
df = pd.read_csv("./data/breast_cancer/breast-cancer-diagnostic.shuf.lrn.csv", skipinitialspace=True)

In [16]:
y = df['class']
y=pd.DataFrame(y)
df = df.drop(['class', "ID"], axis=1)
df = np.array(df)

In [17]:
from sklearn.preprocessing import OneHotEncoder

y['class'] = y['class'].astype(str)
y['class'] = y['class'].map({'False': 0, 'True': 1})
one_hot_encoder = OneHotEncoder(sparse=False)
y = one_hot_encoder.fit_transform(np.array(y).reshape(-1, 1))



In [18]:
from sklearn.model_selection import train_test_split

# Split into train and temp (which includes validation and test)
X_train, X_temp, Y_train, Y_temp = train_test_split(df, y, test_size=0.2, random_state=42)

# Split temp into validation and test
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)



In [19]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

In [20]:
import time

# Record the start time
start_time = time.time()



activation_functions = ['sigmoid','relu']
learning_rates = [0.1, 0.01, 0.001]

best_params,best_accuracy = grid_search_cv(X_train, Y_train, activation_functions,learning_rates)



end_time = time.time()




Configuration: [30, 3, 2], Learning Rate: 0.1


Cross-validated Accuracy with sigmoid and Learning Rate 0.1: 0.9605797101449276
--------------------------------------------
Configuration: [30, 3, 2], Learning Rate: 0.01
Cross-validated Accuracy with sigmoid and Learning Rate 0.01: 0.9778743961352656
--------------------------------------------
Configuration: [30, 3, 2], Learning Rate: 0.001
Cross-validated Accuracy with sigmoid and Learning Rate 0.001: 0.7769082125603864
--------------------------------------------
Configuration: [30, 4, 2], Learning Rate: 0.1
Cross-validated Accuracy with sigmoid and Learning Rate 0.1: 0.9735265700483092
--------------------------------------------
Configuration: [30, 4, 2], Learning Rate: 0.01
Cross-validated Accuracy with sigmoid and Learning Rate 0.01: 0.9866666666666667
--------------------------------------------
Configuration: [30, 4, 2], Learning Rate: 0.001
Cross-validated Accuracy with sigmoid and Learning Rate 0.001: 0.8856038647342995
--------------------------------------------
Configur

In [22]:
best_params
#Configuration: [30, 3, 2]

({'activation_function': 'sigmoid',
  'layers': [30, 4, 2],
  'learning_rate': 0.01,
  'epochs': 100,
  'weights': [matrix([[ 0.01663168, -0.5892001 ,  0.56665088,  0.14564999,  0.98954898,
            -0.61810228, -0.37224761, -0.58487626,  0.66786147,  0.80976722,
            -0.26983575,  1.10602579,  0.22466728, -0.64663712,  1.13633403,
            -0.54753642,  0.3847689 ,  0.35121155, -0.53427436,  0.28642948,
             0.03244976,  0.33658072,  0.38507615, -0.42231667,  0.555426  ,
             1.0573559 ,  0.2478337 ,  0.22073142,  0.73314785, -0.72139655,
            -0.35369127],
           [-0.04062523, -0.52254543, -1.21055492, -0.77659463, -1.0430254 ,
             0.5868272 ,  0.64013533,  0.43837203,  0.21259594, -1.10494935,
             0.27555332, -1.09858487, -0.6242303 , -0.55217228, -0.00233498,
             0.96629532, -0.211672  , -0.29985052, -0.33805922,  0.25096185,
             1.06988018, -0.78425186,  0.10720898, -0.970665  ,  0.07109118,
            -1

In [23]:
# Calculate the elapsed time in seconds
elapsed_time_seconds = end_time - start_time

# Convert elapsed time to minutes
elapsed_time_minutes = elapsed_time_seconds / 60

# Print the elapsed time
print(f"Grid search CV took {elapsed_time_minutes} minutes")

Grid search CV took 121.48661988178888 minutes


Grid search took 30.024658981959025 minutes (without CV)


In [24]:
weights = best_params[0]['weights']
weights

[matrix([[ 0.01663168, -0.5892001 ,  0.56665088,  0.14564999,  0.98954898,
          -0.61810228, -0.37224761, -0.58487626,  0.66786147,  0.80976722,
          -0.26983575,  1.10602579,  0.22466728, -0.64663712,  1.13633403,
          -0.54753642,  0.3847689 ,  0.35121155, -0.53427436,  0.28642948,
           0.03244976,  0.33658072,  0.38507615, -0.42231667,  0.555426  ,
           1.0573559 ,  0.2478337 ,  0.22073142,  0.73314785, -0.72139655,
          -0.35369127],
         [-0.04062523, -0.52254543, -1.21055492, -0.77659463, -1.0430254 ,
           0.5868272 ,  0.64013533,  0.43837203,  0.21259594, -1.10494935,
           0.27555332, -1.09858487, -0.6242303 , -0.55217228, -0.00233498,
           0.96629532, -0.211672  , -0.29985052, -0.33805922,  0.25096185,
           1.06988018, -0.78425186,  0.10720898, -0.970665  ,  0.07109118,
          -1.31415017, -0.6656186 , -0.01003075, -0.619895  ,  0.17475541,
          -0.74426178],
         [-0.81407554, -1.01479756, -1.14949224,  0.

In [25]:
best_params

({'activation_function': 'sigmoid',
  'layers': [30, 4, 2],
  'learning_rate': 0.01,
  'epochs': 100,
  'weights': [matrix([[ 0.01663168, -0.5892001 ,  0.56665088,  0.14564999,  0.98954898,
            -0.61810228, -0.37224761, -0.58487626,  0.66786147,  0.80976722,
            -0.26983575,  1.10602579,  0.22466728, -0.64663712,  1.13633403,
            -0.54753642,  0.3847689 ,  0.35121155, -0.53427436,  0.28642948,
             0.03244976,  0.33658072,  0.38507615, -0.42231667,  0.555426  ,
             1.0573559 ,  0.2478337 ,  0.22073142,  0.73314785, -0.72139655,
            -0.35369127],
           [-0.04062523, -0.52254543, -1.21055492, -0.77659463, -1.0430254 ,
             0.5868272 ,  0.64013533,  0.43837203,  0.21259594, -1.10494935,
             0.27555332, -1.09858487, -0.6242303 , -0.55217228, -0.00233498,
             0.96629532, -0.211672  , -0.29985052, -0.33805922,  0.25096185,
             1.06988018, -0.78425186,  0.10720898, -0.970665  ,  0.07109118,
            -1

In [26]:
predictions = []
best_params_dict = best_params[0]

for item in X_test:
    prediction = predict_with_params(item, best_params_dict)
    predictions.append(prediction)

In [27]:
# Calculate accuracy
accuracy = accuracy(X_test,Y_test,weights,'sigmoid')
print("Accuracy:", accuracy)

Accuracy: 0.9310344827586207


In [None]:

mlp = MLPClassifier(hidden_layer_sizes=(4, 5), max_iter=500)  # 2 layers with 4 and 5 nodes
mlp.fit(X_train, Y_train)

# Evaluation
train_accuracy = mlp.score(X_train, Y_train)
test_accuracy = mlp.score(X_test, Y_test)

print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)


Training Accuracy: 0.9868421052631579
Test Accuracy: 0.9824561403508771


