# Vanilla Neural Network

In [1]:
import math
import pprint

## Activation Functions

#### Sigmoid

In [2]:
def sigmoid_activation(z):
    return 1/(1+math.exp(-z))

#### ReLU

Rectified linear units = max(0,x)

Different types:-
- Simple ReLU : max(0,x)
- Leaky ReLU : 
  - f(x) = x x>0
  - f(x) = 0.01x otherwise
- Noisy ReLU : max (0, x+Y) where Y ~ N(0,sigma(x)) [N: Guassian Noise] 
- Parametric ReLU : 
  - [a>1]
  
     f(x) = x x>0
     
     f(x) = ax otherwise
     
  - [a<=1]
  
     max(x,ax)  

In [3]:
def relu(z):
    return max(0,z)

#### Softplus

In [4]:
def softplus(z):
    # softplus function = log(1+exp(x)) -> smooth approximation of relu that can be differentiated
    return math.log(1+math.exp(z))

#### Tanh (Hyperbolic function)

In [5]:
def tanh(z):
    return (math.exp(z)-math.exp(-z))/(math.exp(z)+math.exp(-z))

## Neurons

In [6]:
def neuron_sigmoid(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = sigmoid_activation(z)
    return y

In [7]:
def neuron_relu(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = relu(z)
    return y

In [8]:
def neuron_softplus(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = softplus(z)
    return y

In [9]:
def neuron_tanh(x,w,b):
    z=0
    for x_,w_ in zip(x,w):
        z+=x_*w_
    z+=b
    y = tanh(z)
    return y

## Prediction

In [10]:
def predict(y):
    if y>0.5:
        # print("Stairs")
        return 1
    else:
        # print("Not Stairs")
        return 0

In [11]:
def stairs_sigmoid(x):
    y_left = neuron_sigmoid(x, w_1, b_left)
    y_right = neuron_sigmoid(x, w_2, b_right)
    y_exp = neuron_sigmoid([y_left, y_right], [w_left, w_right], b)
    return predict(y_exp),y_exp

In [12]:
def stairs_relu(x):
    y_left = neuron_relu(x, w_1, b_left)
    y_right = neuron_relu(x, w_2, b_right)
    y_exp = neuron_relu([y_left, y_right], [w_left, w_right], b)
    return predict(y_exp),y_exp

In [13]:
def stairs_softplus(x):
    y_left = neuron_softplus(x, w_1, b_left)
    y_right = neuron_softplus(x, w_2, b_right)
    y_exp = neuron_softplus([y_left, y_right], [w_left, w_right], b)
    return predict(y_exp),y_exp

In [14]:
def stairs_tanh(x):
    y_left = neuron_tanh(x, w_1, b_left)
    y_right = neuron_tanh(x, w_2, b_right)
    y_exp = neuron_tanh([y_left, y_right], [w_left, w_right], b)
    return predict(y_exp),y_exp

## Solution

In [15]:
w_1 = [0.002, -0.050, 0.012, 0.012]
w_2 = [-0.05, 0.002, 0.012, 0.012]
w_left = 3
w_right = 3
b_left = -0.5
b_right = -0.5
b = -1

In [16]:
x1 = [115, 130, 80, 88]
x2 = [47, 250, 8, 88]
x3 = [182, 5, 157, 155]

In [17]:
prediction,val = stairs_sigmoid(x1)
print(prediction)
prediction,val = stairs_relu(x1)
print(prediction)
prediction,val = stairs_softplus(x1)
print(prediction)
prediction,val = stairs_tanh(x1)
print(prediction)

0
0
0
0


In [18]:
prediction,val = stairs_sigmoid(x2)
print(prediction)
prediction,val = stairs_relu(x2)
print(prediction)
prediction,val = stairs_softplus(x2)
print(prediction)
prediction,val = stairs_tanh(x2)
print(prediction)

0
0
1
0


In [19]:
prediction,val = stairs_sigmoid(x3)
print(prediction)
prediction,val = stairs_relu(x3)
print(prediction)
prediction,val = stairs_softplus(x3)
print(prediction)
prediction,val = stairs_tanh(x3)
print(prediction)

1
1
1
0


# Vanilla Neural Network to fit training data

In [20]:
traindata = open("train.csv").read().split("\n")[1:] 

In [21]:
def preprocess(data):
    preprocessed_data = dict()
    for d in data:
        line = d.split(",")
        preprocessed_data[line[0]] = dict()
        preprocessed_data[line[0]]["y"] = int(line[-1])# label
        preprocessed_data[line[0]]["x"] = [int(line[1]),int(line[2]),int(line[3]),int(line[4])]
    return preprocessed_data

In [22]:
preprocessed = preprocess(traindata[:5])

In [23]:
# initial assumption - random weights and bias
w_1 = [0.002, -0.050, 0.012, 0.012]
w_2 = [-0.05, 0.002, 0.012, 0.012]
w_left = 3
w_right = 3
b_left = -0.5
b_right = -0.5
b = -1

In [24]:
def fit():
    for i in preprocessed:
        #print(i)
        y_exp, val = stairs_sigmoid(preprocessed[i]["x"])
        preprocessed[i]["y-exp"] = y_exp
        preprocessed[i]["neuron-op"] = val
    pprint.pprint(preprocessed)

In [25]:
# loss function Mean Squared Error
def mse_predictor(data):
    mse_sum = 0
    n = len(data)
    for item in data:
        mse_sum += math.pow((data[item]["y-exp"]-data[item]["y"]),2) # observed - true 
    print (mse_sum/n)   

In [26]:
fit()
mse_predictor(preprocessed)

{'1': {'neuron-op': 0.8735069706949717,
       'x': [252, 4, 155, 175],
       'y': 1,
       'y-exp': 1},
 '2': {'neuron-op': 0.8781464903610091,
       'x': [175, 10, 186, 200],
       'y': 1,
       'y-exp': 1},
 '3': {'neuron-op': 0.5920575066543718,
       'x': [82, 131, 230, 100],
       'y': 0,
       'y-exp': 1},
 '4': {'neuron-op': 0.2836270003389756,
       'x': [115, 138, 80, 88],
       'y': 0,
       'y-exp': 0},
 '5': {'neuron-op': 0.9885624404114893,
       'x': [27, 60, 194, 238],
       'y': 0,
       'y-exp': 1}}
0.4


In [27]:
# to observe change in weights and it's effect on mse
w_1 = [0.002, -0.050, 0.012, 0.012] 
w_2 = [-0.05, -1, 0.05, 0.012] # change 2nd weight
w_left = 3
w_right = 3

In [28]:
fit()
mse_predictor(preprocessed)

{'1': {'neuron-op': 0.8737062310734811,
       'x': [252, 4, 155, 175],
       'y': 1,
       'y-exp': 1},
 '2': {'neuron-op': 0.8750816380298204,
       'x': [175, 10, 186, 200],
       'y': 1,
       'y-exp': 1},
 '3': {'neuron-op': 0.29998893813188676,
       'x': [82, 131, 230, 100],
       'y': 0,
       'y-exp': 0},
 '4': {'neuron-op': 0.2723423503744277,
       'x': [115, 138, 80, 88],
       'y': 0,
       'y-exp': 0},
 '5': {'neuron-op': 0.8251012037127704,
       'x': [27, 60, 194, 238],
       'y': 0,
       'y-exp': 1}}
0.2


In [29]:
w_1 = [0.002, -1, 0.012, 0.012] # change 2nd weight
w_2 = [-0.05, 0.002, 0.05, 0.012] 
w_left = 3
w_right = 3

In [30]:
fit()
mse_predictor(preprocessed)

{'1': {'neuron-op': 0.6424121898483348,
       'x': [252, 4, 155, 175],
       'y': 1,
       'y-exp': 1},
 '2': {'neuron-op': 0.8554532866768318,
       'x': [175, 10, 186, 200],
       'y': 1,
       'y-exp': 1},
 '3': {'neuron-op': 0.8807235033641981,
       'x': [82, 131, 230, 100],
       'y': 0,
       'y-exp': 1},
 '4': {'neuron-op': 0.4640861418704272,
       'x': [115, 138, 80, 88],
       'y': 0,
       'y-exp': 0},
 '5': {'neuron-op': 0.8807908174177795,
       'x': [27, 60, 194, 238],
       'y': 0,
       'y-exp': 1}}
0.4


### Derivatives of Activation functions

In [31]:
def derivative_sigmoid(z):
    return z*(1-z)

In [32]:
def derivative_relu(z):
    if z>0:
        return 1
    else:
        return 0 # ReLU is undefined at z = 0 but here it's taken 0 for simplicity

In [33]:
def derivative_softplus(z):
    return (math.exp(z) - 1)/math.exp(z)

In [34]:
def derivative_tanh(z):
    return 1 - math.pow(z,2)

### Backprobagation

In [40]:
def backprob_error_sigmoid(data):
    # print(data)
    for item in data:
        data[item]["backprob-error"] = (data[item]["y"] - data[item]["y-exp"])*derivative_sigmoid(data[item]["neuron-op"])
    return data

In [36]:
def backprob_error_relu(data):
    for item in data:
        data[item]["backprob-error"] = (data[item]["y"] - data[item]["y-exp"])*derivative_relu(data[item]["neuron-op"])
    return data

In [37]:
def backprob_error_softplus(data):
    for item in data:
        data[item]["backprob-error"] = (data[item]["y"] - data[item]["y-exp"])*derivative_softplus(data[item]["neuron-op"])
    return data

In [38]:
def backprob_error_tanh(data):
    for item in data:
        data[item]["backprob-error"] = (data[item]["y"] - data[item]["y-exp"])*derivative_tanh(data[item]["neuron-op"])
    return data

In [48]:
pprint.pprint(backprob_error_sigmoid(preprocessed))

{'1': {'backprob-error': 0.0,
       'neuron-op': 0.6424121898483348,
       'x': [252, 4, 155, 175],
       'y': 1,
       'y-exp': 1},
 '2': {'backprob-error': 0.0,
       'neuron-op': 0.8554532866768318,
       'x': [175, 10, 186, 200],
       'y': 1,
       'y-exp': 1},
 '3': {'backprob-error': -0.10504961398609142,
       'neuron-op': 0.8807235033641981,
       'x': [82, 131, 230, 100],
       'y': 0,
       'y-exp': 1},
 '4': {'backprob-error': 0.0,
       'neuron-op': 0.4640861418704272,
       'x': [115, 138, 80, 88],
       'y': 0,
       'y-exp': 0},
 '5': {'backprob-error': -0.1049983533702993,
       'neuron-op': 0.8807908174177795,
       'x': [27, 60, 194, 238],
       'y': 0,
       'y-exp': 1}}


In [53]:
def update_weights(weights,l_rate,data):
    new_weights = []
    for weight,x in zip(weights, data["x"]):
        w_error = weight*data["backprob-error"]*derivative_sigmoid(data["neuron-op"])
        w = weight+w_error*l_rate*x
        new_weights.append(w)
    return new_weights

In [None]:
def train(network, data, n_epochs, l_rate):
    items = list(reversed(list(data.keys())))
    for epoch in range(n_epochs):
        for item in items:
            

In [45]:
list(reversed(list(preprocessed.keys())))

['5', '4', '3', '2', '1']

In [55]:
#print(w_1)
update_weights(w_1, 0.5, preprocessed)

[0.002, -1, 0.012, 0.012]
5
[0.0017023343363171957, -0.6692603736857727, -0.000832697500992019, -0.0037432062125572173]
[0.0017023343363171957, -0.6692603736857727, -0.000832697500992019, -0.0037432062125572173]
4
[0.0017023343363171957, -0.6692603736857727, -0.000832697500992019, -0.0037432062125572173]
[0.0017023343363171957, -0.6692603736857727, -0.000832697500992019, -0.0037432062125572173]
3
[0.0009321092890501055, -0.18550552237463824, 0.00022405679842647688, -0.00167781331568088]
[0.0009321092890501055, -0.18550552237463824, 0.00022405679842647688, -0.00167781331568088]
2
[0.0009321092890501055, -0.18550552237463824, 0.00022405679842647688, -0.00167781331568088]
[0.0009321092890501055, -0.18550552237463824, 0.00022405679842647688, -0.00167781331568088]
1
[0.0009321092890501055, -0.18550552237463824, 0.00022405679842647688, -0.00167781331568088]


[0.0009321092890501055,
 -0.18550552237463824,
 0.00022405679842647688,
 -0.00167781331568088]