## A simple notebook to test some functions

#### Activation functions: sigmoid and ReLU

Sigmoid and its derivative

In [309]:
import numpy as np

def sigmoid(x):

    s = 1/(1 + np.exp(-x))

    return s

def derivative_sigmoid(s):

    ds = s*(1-s)

    return ds

w = [1,2]
x = [2,4]
b = 3
z = np.dot(w,x) + b
a = sigmoid(z)
da = derivative_sigmoid(a)
print(a,da)


0.999997739675702 2.260319188887599e-06


ReLU and its derivative

In [310]:
def relu(x):
    return x * (x > 0)

def derivative_relu(x):
    return 1 * (x>0)

Now let's implement the softmax function

In [311]:
def softmax(vector):
    e = np.exp(vector)
    return e / e.sum()

In [312]:
def derivative_softmax(vector):
    s = softmax(vector)
    s = s.reshape(-1, 1)

    return np.diagflat(s) - np.dot(s, s.T)

### Implement a fully parametrizable neural network class

In [313]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['species'] = iris.target
data


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [314]:
from sklearn.model_selection import train_test_split
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2,random_state=42)

## Create a NN class

### Forward Pass

In [315]:
from sklearn.model_selection import train_test_split
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2,random_state=42)

# just 4 samples
X = np.array(X_train)

# target values 
y = np.array(y_train).T 

class NeuralNetwork:

    #hidden layer (sigmoid, relu)
    #output layer (sigmoid, softmax)

    def __init__(self, activation_function, no_of_input_nodes, no_of_hidden_nodes, no_of_output_nodes, n_epochs, activation_output):
        self.hidden_layers = len(no_of_hidden_nodes)
        self.activation_function = activation_function
        self.no_of_input_nodes = no_of_input_nodes # as many as the dataset's features
        self.no_of_hidden_nodes = no_of_hidden_nodes # no fixed number, needs tuning
        self.no_of_output_nodes = no_of_output_nodes # as many as the output classes
        self.n_epochs = n_epochs
        self.activation_output = activation_output #activation function of output layer (softmax/sigmoid)

        self.weights, self.biases = self.weights_and_bias()

    def weights_and_bias(self):
        layers = [self.no_of_input_nodes] + self.no_of_hidden_nodes + [self.no_of_output_nodes] #e.g. [2,3,5,2]
        weights = [] #TODO: weight and bias proper initialization (xavier)
        biases = []
        for i in range(len((layers))-1):

            n_in = layers[i]
            n_out = layers[i+1]

            weights.append(2*np.random.random((n_in,n_out)) - 1)
            biases.append(np.zeros((1, n_out)))
        return weights, biases

    
    def forward_pass(self, X):

        #first hidden layer needs to have the actual data
        #all other hidden layers take the result from the previous layer
        a = X
        for layer in range(self.hidden_layers):

            W = self.weights[layer]
            b = self.biases[layer]
            z = np.dot(a, W) + b

            if self.activation_function =='sigmoid':
                a = sigmoid(z)
            elif self.activation_function =='relu':
                a = relu(z)
            print(f'Shape of layer: {a.shape}')

        #---Output Layer---
        W = self.weights[-1]
        b = self.biases[-1]
        z = np.dot(a, W) + b
        if self.activation_output == 'sigmoid':
            a = sigmoid(z)
        elif self.activation_output == 'softmax':
            a = softmax(z)
        print(f'Shape of output layer: {a.shape}')
        return a
        #TODO: compute loss



In [316]:
#TODO: run the forward pass in epochs
#n_epochs = 500
#for epoch in range(n_epochs):
test = NeuralNetwork('sigmoid',4,[3,4],3,500,'softmax')
output = test.forward_pass(X_train)

#backward_pass_hidden_layer(self)
#backward_pass_output_layer(self)

Shape of layer: (120, 3)
Shape of layer: (120, 4)
Shape of output layer: (120, 3)


In [317]:
y_pred = test.forward_pass(X_train)
print(y_pred.shape)



Shape of layer: (120, 3)
Shape of layer: (120, 4)
Shape of output layer: (120, 3)
(120, 3)


In [318]:
print(np.min(y_pred), np.max(y_pred))
print(np.sum(y_pred, axis=1)[:5])


0.0008592303590349501 0.005517008648874792
[0.00844186 0.00846027 0.00833157 0.00842607 0.00841794]


In [330]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def relu(z):
    return np.maximum(0, z)

def softmax(z):
    # z: (N, num_classes)
    shifted = z - np.max(z, axis=1, keepdims=True)
    e = np.exp(shifted)
    return e / np.sum(e, axis=1, keepdims=True)


class NeuralNetwork:

    #hidden layer (sigmoid, relu)
    #output layer (sigmoid, softmax)

    def __init__(self, activation_function, no_of_input_nodes,
                 no_of_hidden_nodes, no_of_output_nodes,
                 n_epochs, activation_output, learning_rate=0.01):    # CHANGED learning_rate=0.01
        self.hidden_layers = len(no_of_hidden_nodes)
        self.activation_function = activation_function
        self.no_of_input_nodes = no_of_input_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.no_of_output_nodes = no_of_output_nodes
        self.n_epochs = n_epochs
        self.activation_output = activation_output
        self.learning_rate = learning_rate     # CHANGED

        self.weights, self.biases = self.weights_and_bias()

    def weights_and_bias(self):
        layers = [self.no_of_input_nodes] + self.no_of_hidden_nodes + [self.no_of_output_nodes]
        weights = []
        biases = []
        for i in range(len(layers) - 1):
            n_in = layers[i]
            n_out = layers[i+1]
            W = 2*np.random.random((n_in, n_out)) - 1
            b = np.zeros((1, n_out))
            weights.append(W)
            biases.append(b)
        return weights, biases

    def forward_pass(self, X):


        # create empty lists to store cache for backward
        self.activations = []          # CHANGED
        self.zs = []                   # CHANGED


        
        a = X
        self.activations.append(a)     # CHANGED: store a0

        
        for layer in range(self.hidden_layers):
            W = self.weights[layer]
            b = self.biases[layer]
            z = np.dot(a, W) + b

            self.zs.append(z)          # CHANGED: store z^{l+1}

            if self.activation_function == 'sigmoid':
                a = sigmoid(z)
            elif self.activation_function == 'relu':
                a = relu(z)
            else:
                raise ValueError("Unsupported hidden activation")

            self.activations.append(a) # CHANGED: store a^{l+1}

        
        # output layer
        W = self.weights[-1]
        b = self.biases[-1]
        z = np.dot(a, W) + b

        self.zs.append(z)              # CHANGED: store z^L

        
        if self.activation_output == 'sigmoid':
            a = sigmoid(z)
        elif self.activation_output == 'softmax':
            a = softmax(z)
        else:
            raise ValueError("Unsupported output activation")

        self.activations.append(a)     # CHANGED: store a^L
        
        return a


        



    def compute_loss(self, y_pred, y_true):
    
        # number of samples
        N = y_true.shape[0]
        
        correct_probs = y_pred[np.arange(N), y_true]
    
        # loss = average of -log(p)
        loss = -np.sum(np.log(correct_probs)) / N
        return loss
    
    def backward(self, X, y_true, y_pred):

        
        N = X.shape[0]   # number of samples
    
        # make empty lists for gradients
        dW = [None] * len(self.weights)
        db = [None] * len(self.biases)
    
        # output layer gradient
        
        y_true = y_true.astype(int)
    
        # dZ for softmax + cross entropy
        dZ = y_pred.copy()
        dZ[np.arange(N), y_true] -= 1
        dZ = dZ / N
    
        # index of last layer
        L = len(self.weights) - 1
    
        # gradients for last layer
        # activations[L] = a^{L-1}
        dW[L] = np.dot(self.activations[L].T, dZ)
        db[L] = np.sum(dZ, axis=0, keepdims=True)
    
        # dA for the layer below
        dA = np.dot(dZ, self.weights[L].T)
    
        # hidden layers (from last to first)
        
        for l in range(self.hidden_layers - 1, -1, -1):
    
            # z^{l+1} and a^{l+1}
            z = self.zs[l]
            a = self.activations[l+1]
    
            # derivative of activation
            if self.activation_function == 'relu':
                d_activation = (z > 0).astype(float)
            elif self.activation_function == 'sigmoid':
                d_activation = a * (1 - a)
    
            # dZ = dA * activation
            dZ = dA * d_activation
    
            # compute gradients for this layer
            dW[l] = np.dot(self.activations[l].T, dZ)
            db[l] = np.sum(dZ, axis=0, keepdims=True)
    
            # compute next dA (unless we reach first layer)
            if l > 0:
                dA = np.dot(dZ, self.weights[l].T)
    
        # update all weights and biases
        
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * dW[i]
            self.biases[i]  -= self.learning_rate * db[i]


In [331]:
nn = NeuralNetwork(
    activation_function='relu',
    no_of_input_nodes=X_train.shape[1],       # input features
    no_of_hidden_nodes=[8, 8],                # two hidden layers, each layer has 8 neurons
    no_of_output_nodes=len(np.unique(y_train)),
    n_epochs=1000,
    activation_output='softmax',
    learning_rate=0.001
)

In [332]:
for epoch in range(nn.n_epochs):
    # forward
    y_pred = nn.forward_pass(X_train)

    # loss
    loss = nn.compute_loss(y_pred, y_train)

    # backward
    nn.backward(X_train, y_train, y_pred)

    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch+1}/{nn.n_epochs}, loss = {loss:.4f}")


Epoch 50/1000, loss = 1.4626
Epoch 100/1000, loss = 0.7816
Epoch 150/1000, loss = 0.6246
Epoch 200/1000, loss = 0.5436
Epoch 250/1000, loss = 0.4913
Epoch 300/1000, loss = 0.4536
Epoch 350/1000, loss = 0.4243
Epoch 400/1000, loss = 0.3999
Epoch 450/1000, loss = 0.3804
Epoch 500/1000, loss = 0.3634
Epoch 550/1000, loss = 0.3486
Epoch 600/1000, loss = 0.3355
Epoch 650/1000, loss = 0.3237
Epoch 700/1000, loss = 0.3130
Epoch 750/1000, loss = 0.3030
Epoch 800/1000, loss = 0.2937
Epoch 850/1000, loss = 0.2852
Epoch 900/1000, loss = 0.2775
Epoch 950/1000, loss = 0.2703
Epoch 1000/1000, loss = 0.2633


In [333]:
# train accuracy
y_train_pred = np.argmax(nn.forward_pass(X_train), axis=1)
train_acc = np.mean(y_train_pred == y_train)
print(f"Train accuracy: {train_acc * 100:.2f}%")

# test accuracy
y_test_pred = np.argmax(nn.forward_pass(X_test), axis=1)
test_acc = np.mean(y_test_pred == y_test)
print(f"Test accuracy: {test_acc * 100:.2f}%")



Train accuracy: 97.50%
Test accuracy: 93.33%
