In [443]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

In [444]:
def init_layers(nn_architecture):
    # random seed initiation
    np.random.seed(42)
    # number of layers in our neural network
    number_of_layers = len(nn_architecture)
    # parameters storage initiation
    params_values = {}
    
    # iteration over network layers
    for id_x, layer in enumerate(nn_architecture):
        # count network layers from 1
        layer_id = id_x + 1
        
        # extracting the number of units in layers
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        # initiating the values of the W matrix
        # and vector b for subsequent layers
        params_values['W' + str(layer_id)] = np.random.randn(layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_id)] = np.random.randn(layer_output_size, 1) * 0.1
        
    return params_values

In [445]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def softmax(Z):
    Z = np.exp(Z - np.max(Z))
    return Z / np.sum(np.exp(Z)) 

def softmax_backward(dA, Z):
    softm = softmax(Z)
    return dA * softm * (1 - softm)

def relu(Z):
    return np.maximum(0, Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0
    dZ[Z > 0] = 1
    return dZ

In [446]:
def forward_propagation(X, params_values, nn_architecture):
    # creating a temporary memory to store the information needed for a backward step
    memory = {}
    # X vector is the activation for layer 0 
    A_curr = X
    
    # iteration over network layers
    for idx, layer in enumerate(nn_architecture):
        # we number network layers from 1
        layer_idx = idx + 1
        # transfer the activation from the previous iteration
        A_prev = A_curr
        
        # extraction of the activation function for the current layer
        activ_function_curr = layer["activation"]
        # extraction of W for the current layer
        W_curr = params_values["W" + str(layer_idx)]
        # extraction of b for the current layer
        b_curr = params_values["b" + str(layer_idx)]
        
        # calculation of activation for the current layer
        Z_curr = np.dot(W_curr, A_prev) + b_curr
        A_curr = eval('{}(Z_curr)'.format(activ_function_curr))
        
        # saving calculated values in the memory
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
        
    # return predicted & saved values
    return A_curr, memory

In [447]:
def backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    
    # number of examples
    m = Y.shape[1]
    # a hack ensuring the same shape of the prediction vector and labels vector
    Y = Y.reshape(Y_hat.shape)
    
    # initiation of gradient descent algorithm
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        # we number network layers from 1
        layer_idx_curr = layer_idx_prev + 1
        # extraction of the activation function for the current layer
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        m_previous = A_prev.shape[1]

        # calculation of the activation function derivative
        dZ_curr = eval('{}_backward(dA_curr, Z_curr)'.format(activ_function_curr))

        # derivative of the weights matrix 
        dW_curr = np.dot(dZ_curr, A_prev.T) / m_previous
        # derivative of the bias vector
        db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m_previous
        # derivative of the previous layer activated matrix
        dA_prev = np.dot(W_curr.T, dZ_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [543]:
def get_cost_value(p, q):
    # number of examples
#     m = Y_hat.shape[0]
    # calculation of the cost according to the formula
#     cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    cost = -sum([p[i]*np.log(q[i]) for i in range(len(p))])
#     print (np.squeeze(cost))
    print (cost)
    return cost

In [544]:
# an auxiliary function that converts probability into class
def convert_prob_into_class(probs):
    probs_ = np.copy(probs)
    
    probs_[probs_ < 0.1] = 0
    probs_[(probs_ >= 0.1) & (probs_ < 0.2)] = 1
    probs_[(probs_ >= 0.2) & (probs_ < 0.3)] = 2
    probs_[(probs_ >= 0.3) & (probs_ < 0.4)] = 3
    probs_[(probs_ >= 0.4) & (probs_ < 0.5)] = 4
    probs_[(probs_ >= 0.5) & (probs_ < 0.6)] = 5
    probs_[(probs_ >= 0.6) & (probs_ < 0.7)] = 6
    probs_[(probs_ >= 0.7) & (probs_ < 0.8)] = 7
    probs_[(probs_ >= 0.8) & (probs_ < 0.9)] = 8
    probs_[probs_ >= 0.9] = 9
    
    return probs_

In [545]:
def get_accuracy_value(y_hat, y):
#     Y_hat_ = convert_prob_into_class(Y_hat)
#     return (Y_hat_ == Y).all(axis=0).mean()
    if y_hat.shape[1] > 1:
        return float((y_hat.argmax(axis=1).astype('float32') == y.astype(
            'float32')).sum())
    else:
        return float((y_hat.astype('int32') == y.astype('int32')).sum())

In [546]:
def update(params_values, grads_values, nn_architecture, learning_rate):

    # iteration over network layers
    for layer_idx, layer in enumerate(nn_architecture, 1):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

In [547]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    # initiation of neural net parameters
    params_values = init_layers(nn_architecture)
    # initiation of lists storing the history 
    # of metrics calculated during the learning process 
    cost_history = []
    accuracy_history = []
    
    # performing calculations for subsequent iterations
    for i in range(epochs):
        # step forward
        Y_hat, cashe = forward_propagation(X, params_values, nn_architecture)
        
        # calculating metrics and saving them in history
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        # calculating gradient
        grads_values = backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        # updating model state
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
        if (i % 50 == 0):
            print("Iteration: {} - cost: {:.5f} - accuracy: {:.5f}".format(i, cost, accuracy))
            
    return params_values, cost_history

In [548]:
NN_ARCHITECTURE = [
    {"input_dim": 64, "output_dim": 120, "activation": "relu"},
    {"input_dim": 120, "output_dim": 200, "activation": "relu"},
    {"input_dim": 200, "output_dim": 200, "activation": "relu"},
    {"input_dim": 200, "output_dim": 1, "activation": "softmax"}
#     {"input_dim": 200, "output_dim": 1, "activation": "softmax"},
]

In [549]:
data = load_digits()

In [550]:
X = data.data
X = X.astype('float64')
X /= 255.
X.max()

0.06274509803921569

In [551]:
y = data.target

In [552]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [553]:
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1437, 64) (360, 64) (1437,) (360,)


In [554]:
params_values, cost_history = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), NN_ARCHITECTURE, 30000, 0.001)

[-0.00044995         inf         inf ... -0.00018037 -0.00050178
 -0.        ]


  


TypeError: unsupported format string passed to numpy.ndarray.__format__

In [None]:
Y_test_hat, _ = forward_propagation(np.transpose(X_test), params_values, NN_ARCHITECTURE)

In [542]:
acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))
print("Test accuracy: {:.5f}".format(acc_test))

Test accuracy: 0.00000


In [None]:
n_epochs = np.arange(1000)

In [None]:
plt.plot(n_epochs, cost_history)