# Vanilla Neural Networks

**Task: **Building a vanilla neural network to predict if the four pixes represent a stairs format given the color values of each pixel (0 to 255)

**Steps:** Establish forward and backpropagation, and test results for different activation functions.

**Details:-**
- Activation functions used:
        -Sigmoid
        -ReLU
        -Softplus
        -Tanh
- Input : 4 pixel color values
- Hidden Layer: 1 hidden layer with 2 neurons- left and right
- Output Layer: 1 output
- Loss function: Gradient descent
- Parameters: activation function, network structure (number of inputs and weights + number of hidden layers), learning rate and number of epochs

## Modules

In [1]:
import math
import pprint
from sklearn.metrics import f1_score, accuracy_score

### Activations

In [2]:
def sigmoid_activation(z):
    try:
        ans = math.exp(-z)
    except OverflowError:
        if(z<0):
            ans = math.exp(float('inf'))
        else:
            ans = math.exp(-float('inf'))
    
    return 1/(1+ans)

In [3]:
def relu(z):
    return max(0,z)

In [4]:
def softplus(z):
    # softplus function = log(1+exp(x)) -> smooth approximation of relu that can be differentiated
    try:
        ans = math.exp(z)
    except OverflowError:
        if(z<0):
            ans = math.exp(-float('inf'))
        else:
            ans = math.exp(float('inf'))
    return math.log(1+ans)

In [5]:
def tanh(z):
    try:
        ans = math.exp(z)
        negans = math.exp(-z)
    except OverflowError:
        if(z<0):
            ans = math.exp(-float('inf'))
            negans = math.exp(float('inf'))
        else:
            ans = math.exp(float('inf'))
            negans = math.exp(-float('inf'))
    return (ans-negans)/(ans+negans)

### Neurons

In [6]:
def neuron_sigmoid(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = sigmoid_activation(z)
    return y

In [7]:
def neuron_relu(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = relu(z)
    return y

In [8]:
def neuron_softplus(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = softplus(z)
    return y

In [9]:
def neuron_tanh(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = tanh(z)
    return y

### Predict if stairs 

In [10]:
def predict(y):
    if y>0.5:
        # print("Stairs")
        return 1
    else:
        # print("Not Stairs")
        return 0

In [11]:
def stairs_sigmoid(network, x):
    y_left = neuron_sigmoid(x, network["w_1"], network["b_left"])
    y_right = neuron_sigmoid(x, network["w_2"], network["b_right"])
    y_exp = neuron_sigmoid([y_left, y_right], [network["w_left"], network["w_right"]], network["b"])
    return predict(y_exp),y_exp

In [12]:
def stairs_relu(network, x):
    y_left = neuron_relu(x, network["w_1"], network["b_left"])
    y_right = neuron_relu(x, network["w_2"], network["b_right"])
    y_exp = neuron_relu([y_left, y_right], [network["w_left"], network["w_right"]], network["b"])
    return predict(y_exp),y_exp

In [13]:
def stairs_softplus(network, x):
    y_left = neuron_softplus(x, network["w_1"], network["b_left"])
    y_right = neuron_softplus(x, network["w_2"], network["b_right"])
    y_exp = neuron_softplus([y_left, y_right], [network["w_left"], network["w_right"]], network["b"])
    return predict(y_exp),y_exp

In [14]:
def stairs_tanh(network, x):
    y_left = neuron_tanh(x, network["w_1"], network["b_left"])
    y_right = neuron_tanh(x, network["w_2"], network["b_right"])
    y_exp = neuron_tanh([y_left, y_right], [network["w_left"], network["w_right"]], network["b"])
    return predict(y_exp),y_exp

### Preprocess input

In [15]:
def preprocess(data):
    preprocessed_data = dict()
    for d in data:
        line = d.split(",")
        preprocessed_data[line[0]] = dict()
        preprocessed_data[line[0]]["y"] = int(line[-1])# label
        preprocessed_data[line[0]]["x"] = [int(line[1]),int(line[2]),int(line[3]),int(line[4])]
    return preprocessed_data

### Feed Forward 

In [16]:
def forward_propagate(network, data, activation):
    if activation == "sigmoid":
        y_exp, val = stairs_sigmoid(network, data["x"])
    elif activation == "relu":
        y_exp, val = stairs_relu(network, data["x"])
    elif activation == "softplus":
        y_exp, val = stairs_softplus(network, data["x"])
    elif activation == "tanh":
        y_exp, val = stairs_tanh(network, data["x"])
    data["y-exp"] = y_exp
    data["neuron-op"] = val
    #pprint.pprint(data)
    return data

### Derivatives of Activation functions for backpropagation

In [17]:
def derivative_sigmoid(z):
    return z*(1-z)

In [18]:
def derivative_relu(z):
    if z>0:
        return 1
    else:
        return 0 # ReLU is undefined at z = 0 but here it's taken 0 for simplicity

In [19]:
def derivative_softplus(z):
    return (math.exp(z) - 1)/math.exp(z)

In [20]:
def derivative_tanh(z):
    return 1 - math.pow(z,2)

### Backpropagation errors

In [21]:
def backprob_error_sigmoid(data):
    data["backprob-error"] = (data["y"] - data["y-exp"])*derivative_sigmoid(data["neuron-op"])
    return data

In [22]:
def backprob_error_relu(data):
    data["backprob-error"] = (data["y"] - data["y-exp"])*derivative_relu(data["neuron-op"])
    return data

In [23]:
def backprob_error_softplus(data):
    data["backprob-error"] = (data["y"] - data["y-exp"])*derivative_softplus(data["neuron-op"])
    return data

In [24]:
def backprob_error_tanh(data):
    data["backprob-error"] = (data["y"] - data["y-exp"])*derivative_tanh(data["neuron-op"])
    return data

### Back propagation

In [25]:
def back_propagate(data, activation):
    if activation == "sigmoid":
        res = backprob_error_sigmoid(data)
    elif activation == "relu":
        res = backprob_error_relu(data)
    elif activation == "softplus":
        res = backprob_error_softplus(data)
    elif activation == "tanh":
        res = backprob_error_tanh(data)
    return res

### Update weights 

In [26]:
def update_weights(weights,l_rate,data, activation):
    new_weights = []
    if activation == "sigmoid":
        derivative = derivative_sigmoid(data["neuron-op"])
    elif activation == "relu":
        derivative = derivative_relu(data["neuron-op"])
    elif activation == "softplus":
        derivative = derivative_softplus(data["neuron-op"])
    elif activation == "tanh":
        derivative = derivative_tanh(data["neuron-op"])
    
    for weight,x in zip(weights, data["x"]):
        w_error = weight*data["backprob-error"]*derivative
        w = weight+w_error*l_rate*x
        new_weights.append(w)
    return new_weights

### Train Network

In [39]:
def train(network, data, n_epochs, l_rate, activation):
    items = list(reversed(list(data.keys())))
    n = len(items)
    for epoch in range(n_epochs):
        sum_error = 0
        for item in items:
            data[item] = forward_propagate(network, data[item], activation)
            data[item] = back_propagate(data[item], activation)
            sum_error+=math.pow((data[item]["y-exp"]-data[item]["y"]),2)
            network["w_1"] = update_weights(network["w_1"],l_rate,data[item], activation)
            #print("Weights Left: ",str(network["w_1"]))
            network["w_2"] = update_weights(network["w_2"],l_rate,data[item], activation)
            #print("Weights Right: ",str(network["w_2"]))
#             print("Network weights:")
#             pprint.pprint(network)
#             pprint.pprint(data[item])
        sum_error/=n
        print("epoch = %d mean squared error = %0.5f learning rate = %0.10f"%(epoch, sum_error, l_rate))

### Test Data

In [28]:
def test(network, data, activation):
    for item in data:
        data[item] = forward_propagate(network, data[item], activation)
    return data

### Extract and Evaluate test results

In [29]:
def extract(data):
    y_true = []
    y_out = []
    for item in data:
        y_true.append(data[item]["y"])
        y_out.append(data[item]["y-exp"])
    return y_true, y_out

In [30]:
def evaluate(y_true, y_out):
    return f1_score(y_true, y_out, average='macro'), accuracy_score(y_pred=y_out, y_true=y_true)

## Process

In [31]:
traindata = open("train.csv").read().split("\n")[1:] 

In [32]:
testdata = open("test.csv").read().split("\n")[1:]
test_sig = preprocess(testdata)
test_rel = preprocess(testdata)
test_sof = preprocess(testdata)
test_tan = preprocess(testdata)

In [33]:
# random initialization of network
network = dict()
network["w_1"] = [0.002, -0.050, 0.012, 0.012]
network["w_2"] = [-0.05, 0.002, 0.012, 0.012]
network["w_left"] = 3
network["w_right"] = 3
network["b_left"] = -0.5
network["b_right"] = -0.5
network["b"] = -1
# print("Network weights:")
# pprint.pprint(network)

### Sigmoid Network

In [47]:
preprocessed_sig = preprocess(traindata)
network["w_1"] = [0.002, -0.050, 0.012, 0.012]
network["w_2"] = [-0.05, 0.002, 0.012, 0.012]
train(network, preprocessed_sig, 25, 0.000025511, "sigmoid")
result = test(network, test_sig, "sigmoid")
y_true, y_out = extract(result)
f1, acc = evaluate(y_true,y_out)
print("F1 Score: %0.5f\tAccuracy: %0.3f"%(f1,acc))

epoch = 0 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 1 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 2 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 3 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 4 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 5 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 6 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 7 mean squared error = 0.22250 learning rate = 0.0000255110
epoch = 8 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 9 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 10 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 11 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 12 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 13 mean squared error = 0.22000 learning rate = 0.0000255110
epoch = 14 mean squared error = 0.22000 lear

### ReLU Network

In [35]:
preprocessed_relu = preprocess(traindata)
network["w_1"] = [0.002, -0.050, 0.012, 0.012]
network["w_2"] = [-0.05, 0.002, 0.012, 0.012]
train(network, preprocessed_relu, 5, 0.002, "relu")
result = test(network, test_rel, "relu")
y_true, y_out = extract(result)### Sigmoid Network
f1, acc = evaluate(y_true,y_out)
print("F1 Score: %0.5f\tAccuracy: %0.3f"%(f1,acc))

epoch = 0 mean squared error = 0.08500 learning rate = 0.002000
epoch = 1 mean squared error = 0.07250 learning rate = 0.002000
epoch = 2 mean squared error = 0.09750 learning rate = 0.002000
epoch = 3 mean squared error = 0.04750 learning rate = 0.002000
epoch = 4 mean squared error = 0.08500 learning rate = 0.002000
F1 Score: 0.98997	Accuracy: 0.990


### Softplus Network

In [36]:
preprocessed_soft = preprocess(traindata)
network["w_1"] = [0.002, -0.050, 0.012, 0.012]
network["w_2"] = [-0.05, 0.002, 0.012, 0.012]
train(network, preprocessed_soft, 10, 0.00000155, "softplus")
result = test(network, test_sof, "softplus")
y_true, y_out = extract(result)
f1, acc = evaluate(y_true,y_out)
print("F1 Score: %0.5f\tAccuracy: %0.3f"%(f1,acc))

epoch = 0 mean squared error = 0.26750 learning rate = 0.000002
epoch = 1 mean squared error = 0.26750 learning rate = 0.000002
epoch = 2 mean squared error = 0.26750 learning rate = 0.000002
epoch = 3 mean squared error = 0.26750 learning rate = 0.000002
epoch = 4 mean squared error = 0.26750 learning rate = 0.000002
epoch = 5 mean squared error = 0.26750 learning rate = 0.000002
epoch = 6 mean squared error = 0.26750 learning rate = 0.000002
epoch = 7 mean squared error = 0.26750 learning rate = 0.000002
epoch = 8 mean squared error = 0.26750 learning rate = 0.000002
epoch = 9 mean squared error = 0.26750 learning rate = 0.000002
F1 Score: 0.72917	Accuracy: 0.740


### Tanh Network

In [125]:
preprocessed_tanh = preprocess(traindata)
network["w_1"] = [0.002, -0.050, 0.012, 0.012]
network["w_2"] = [-0.05, 0.002, 0.012, 0.012]
train(network, preprocessed_tanh, 15, 0.00002170, "tanh")
result = test(network, test_tan, "tanh")
y_true, y_out = extract(result)
f1, acc = evaluate(y_true,y_out)
print("F1 Score: %0.5f\tAccuracy: %0.3f"%(f1,acc))

epoch = 0 mean squared error = 0.53750 learning rate = 0.0000217000
epoch = 1 mean squared error = 0.54500 learning rate = 0.0000217000
epoch = 2 mean squared error = 0.55000 learning rate = 0.0000217000
epoch = 3 mean squared error = 0.56000 learning rate = 0.0000217000
epoch = 4 mean squared error = 0.55250 learning rate = 0.0000217000
epoch = 5 mean squared error = 0.51500 learning rate = 0.0000217000
epoch = 6 mean squared error = 0.44750 learning rate = 0.0000217000
epoch = 7 mean squared error = 0.37750 learning rate = 0.0000217000
epoch = 8 mean squared error = 0.34000 learning rate = 0.0000217000
epoch = 9 mean squared error = 0.30250 learning rate = 0.0000217000
epoch = 10 mean squared error = 0.28750 learning rate = 0.0000217000
epoch = 11 mean squared error = 0.28250 learning rate = 0.0000217000
epoch = 12 mean squared error = 0.27500 learning rate = 0.0000217000
epoch = 13 mean squared error = 0.26750 learning rate = 0.0000217000
epoch = 14 mean squared error = 0.26250 lear

### References:-

    -http://blog.kaggle.com/2017/11/27/introduction-to-neural-networks/
    -https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/
    -https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
    -https://en.wikipedia.org/wiki/Hyperbolic_function#Hyperbolic_tangent
    -https://www.quora.com/What-are-the-benefits-of-using-ReLU-over-softplus-as-activation-functions
    -https://theclevermachine.wordpress.com/2014/09/08/derivation-derivatives-for-common-neural-network-activation-functions/