In [87]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Create training, validation, and test datasets
N_train = 500
N_test = 100
X, y = make_moons(N_train + N_test, noise=0.20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=N_test, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [81]:
myNN = myNeuralNetwork(n_in=2, n_layer1=5, n_layer2=5, n_out=1, learning_rate=0.01)

# Train the model and collect the cost values for each epoch
training_loss, validation_loss = myNN.fit(X_train, y_train, max_epochs=1000, learning_rate=0.01, get_validation_loss=True)



ValueError: shapes (5,2) and (400,2) not aligned: 2 (dim 1) != 400 (dim 0)

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Train the model on the combined training and validation sets with the best hyperparameters
clf = myNeuralNetwork(n_in=2, n_layer1=5, n_layer2=5, n_out=1, learning_rate=0.1)
clf.fit(np.concatenate([X_train, X_val]), np.concatenate([y_train, y_val]),max_epochs=2000)

# Create a 2x1 subplot
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))

# Plot the decision boundary on the training data
scatter0 = axs[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Spectral)
axs[0].set_title('Decision boundary for training data')
xx, yy = np.meshgrid(np.linspace(-2, 3, 100), np.linspace(-2, 2, 100))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
contour0 = axs[0].contourf(xx, yy, Z, levels=[0, 0.5, 1], alpha=0.5, cmap=plt.cm.Spectral)
axs[0].contour(xx, yy, Z, levels=[0.5], colors='k')
axs[0].set_xticks([])
axs[0].set_yticks([])

# Plot the decision boundary on the validation data
scatter1 = axs[1].scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=plt.cm.Spectral)
axs[1].set_title('Decision boundary for validation data')
contour1 = axs[1].contourf(xx, yy, Z, levels=[0, 0.5, 1], alpha=0.5, cmap=plt.cm.Spectral)
axs[1].contour(xx, yy, Z, levels=[0.5], colors='k')
axs[1].set_xticks([])
axs[1].set_yticks([])

# Create legend
handles = [scatter0.legend_elements()[0][0], scatter0.legend_elements()[0][1]]
labels = ['Class 1', 'Class 0']
fig.legend(handles, labels, loc='center')

plt.show()

In [92]:
nn_architecture = [
    {"input_dim": 2, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

def init_layers(nn_architecture, seed = 99):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) * 0.1
        
    return params_values


def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ;

def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(Z_curr), Z_curr


def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory


def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()


def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "relu":
        backward_activation_func = relu_backward
    elif activation is "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Exception('Non-supported activation function')
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history


# test the model
params_values, cost_history, accuracy_history = train(X_train, y_train, nn_architecture, 1000, 0.01)


  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":
  if activation is "relu":
  elif activation is "sigmoid":


ValueError: shapes (4,2) and (400,2) not aligned: 2 (dim 1) != 400 (dim 0)