Reference: https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795

In [1]:
import numpy as np
import pandas as pd

# Neural Network Architecture
<img src="NeuralNetwork.png" width=600px>

In [2]:
nn_architecture = [
    {"input_dim": 5, "output_dim": 5, "activation": "relu"},
    {"input_dim": 5, "output_dim": 3, "activation": "sigmoid"}
]

In [3]:
def init_layers(nn_architecture, seed = 99):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) * 0.1
        
    return params_values

In [4]:
init_layers(nn_architecture)

{'W1': array([[-0.01423588,  0.20572217,  0.02832619,  0.1329812 , -0.01546219],
        [-0.00690309,  0.07551805,  0.08256466, -0.01130692, -0.23678376],
        [-0.01670494,  0.0685398 ,  0.00235001,  0.04562013,  0.02704928],
        [-0.14350081,  0.08828171, -0.05800817, -0.05015653,  0.05909533],
        [-0.07316163,  0.02617555, -0.08557956, -0.01875259, -0.03734863]]),
 'W2': array([[-0.05738197,  0.00527031,  0.22073106,  0.03918219,  0.04827134],
        [ 0.0433334 , -0.17042917, -0.02439081, -0.21397038,  0.08613227],
        [ 0.17002844, -0.05287848,  0.17634779, -0.11216078, -0.11919342]]),
 'b1': array([[-0.0461971 ],
        [-0.08164661],
        [-0.00451233],
        [ 0.01213278],
        [ 0.09259528]]),
 'b2': array([[ 0.05527319],
        [-0.08159809],
        [-0.04966468]])}

In [5]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ;Sério


In [6]:
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(Z_curr), Z_curr

In [7]:
def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory

In [8]:
def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()

In [9]:
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "relu":
        backward_activation_func = relu_backward
    elif activation is "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Exception('Non-supported activation function')
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

In [10]:
def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [11]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

In [12]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history

# Preparing Dataset

In [56]:
def load_dataset(fname):
    # read dataset
    dataset = pd.read_csv(fname, comment='#')

    # compositions to wt.%
    dataset['C'] = dataset['C'].apply(lambda x: x*100)
    dataset['Mn'] *= 100
    dataset['Si'] *= 100
    dataset['Cr'] *= 100
    dataset['Ni'] *= 100

    # temperatures to oC
    dataset.A1 -= 273.15
    dataset.A1prime -= 273.15
    dataset.A3 -= 273.15

    return dataset

In [57]:
raw_df = load_dataset('../../databases/Tcriticalcopy.csv')
raw_df = raw_df.drop(['file', 'macro'], axis=1)
raw_df

Unnamed: 0,C,Mn,Si,Cr,Ni,A1,A1prime,A3,eutectoid
0,0.0,0.0001,0.000100,0.000100,0.000100,,,911.65,hipo
1,0.0,0.0001,0.000100,0.000100,0.750075,,,880.26,hipo
2,0.0,0.0001,0.000100,0.000100,1.500050,,,853.25,hipo
3,0.0,0.0001,0.000100,0.000100,2.250025,,,829.60,hipo
4,0.0,0.0001,0.000100,0.000100,3.000000,,,808.65,hipo
5,0.0,0.0001,0.000100,0.750075,0.000100,,,900.04,hipo
6,0.0,0.0001,0.000100,0.750075,0.750075,,,869.92,hipo
7,0.0,0.0001,0.000100,0.750075,1.500050,,,843.92,hipo
8,0.0,0.0001,0.000100,0.750075,2.250025,,,821.11,hipo
9,0.0,0.0001,0.000100,0.750075,3.000000,,,800.86,hipo
