## NN

#### Simple nn with gradient checkpoint

In [2]:
import torch
import torch.nn as nn
from torch.utils.checkpoint import checkpoint_sequential

In [3]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(500, 100) # (in_feature, out_feature)
        self.layer2 = nn.Linear(400, 80)
        self.layer3 = nn.Linear(300, 60)
        self.layer4 = nn.Linear(200, 40)

        def forward(self, x):
            layers = torch.nn.Sequential(
                self.layer1,
                nn.ReLU(),
                self.layer2,
                nn.ReLU(),
                self.layer3,
                nn.ReLU(),
                self.layer4
            )
            x = checkpoint_sequential(layers, segments=2, input=x)
            return x

In [4]:
g_model = SimpleNN()
g_model

SimpleNN(
  (layer1): Linear(in_features=500, out_features=100, bias=True)
  (layer2): Linear(in_features=400, out_features=80, bias=True)
  (layer3): Linear(in_features=300, out_features=60, bias=True)
  (layer4): Linear(in_features=200, out_features=40, bias=True)
)

#### Dropout

- To zeroed out neurons randomly to regularize the nn during **training**
- Uses bernoulli distribution $1/(1-p)$ -- p is dropout probability
- It scales remaining active inputs

In [14]:
droupout_layer = nn.Dropout(0.4) # p = 0.4

In [15]:
# training
droupout_layer.train()

Dropout(p=0.4, inplace=False)

In [16]:
# sample input
d = torch.rand((1,6))
d

tensor([[0.5444, 0.4021, 0.1602, 0.3788, 0.5806, 0.2136]])

In [17]:
droupout_layer(d)

tensor([[0.9073, 0.6702, 0.2670, 0.6314, 0.9676, 0.0000]])

In [18]:
p = 0.4
print(d/(1-p))

tensor([[0.9073, 0.6702, 0.2670, 0.6314, 0.9676, 0.3560]])


- Here last value zeroed out (0.2136 -> 0.00) while others incresed as they scaled out :: same as d/(1-p)

In [19]:
# eval
droupout_layer.eval()

Dropout(p=0.4, inplace=False)

In [20]:
d

tensor([[0.5444, 0.4021, 0.1602, 0.3788, 0.5806, 0.2136]])

In [22]:
droupout_layer(d) #  No change in value

tensor([[0.5444, 0.4021, 0.1602, 0.3788, 0.5806, 0.2136]])

> Dropuout in NN
  <pre>
      For p = 0.4 (40% probability)
      training: 40% neurons zeroed out so got 60% input
      evaluation: it got total 100% inputs
      So by scaling out other active inputs it basically brings the desired range during training 
  </pre>

### Learning NN example

Learning XOR operation in nn

In [1]:
import numpy as np

In [2]:
np.random.seed(3)                         # set seed to make repeatable
lr = 0.1
index_list = [0, 1, 2, 3]                 # to randomize order

x_train = [np.array([1.0, -1.0, -1.0]),   # input
           np.array([1.0, -1.0, 1.0]),
           np.array([1.0, 1.0, -1.0]),
           np.array([1.0, 1.0, 1.0])]
y_train = [0.0, 1.0, 1.0, 0.0]             # output

In [3]:
# track neurons weights
def neuron_w(input_count):
    weights = np.zeros(input_count+1)
    for i in range(1, (input_count+1)):
        weights[i] = np.random.uniform(-1.0, 1.0)
    return weights

In [4]:
n_w = [neuron_w(2), neuron_w(2), neuron_w(2)]
n_y = [0, 0, 0]
n_error = [0, 0, 0]

In [5]:
n_w

[array([0.        , 0.10159581, 0.41629565]),
 array([ 0.        , -0.41819052,  0.02165521]),
 array([0.        , 0.78589391, 0.79258618])]

In [12]:
def show_learning():
    print('Current weights:')
    for i, w in enumerate(n_w):
        print(f'neuron = {i}, w0 = {w[0]:.4f}, w1 = {w[1]:.4f}, w2 = {w[2]:.4f}')
    print('-'*50)

In [8]:
def forward_pass(x):
    global n_y
    n_y[0] = np.tanh(np.dot(n_w[0], x))         # neuron 0
    n_y[1] = np.tanh(np.dot(n_w[1], x))         # neuron 1
    n2_inputs = np.array([1.0, n_y[0], n_y[1]]) # 1.0 is bias
    z2 = np.dot(n_w[2], n2_inputs)
    n_y[2] = 1.0 / (1.0 + np.exp(-z2))

In [9]:
def backward_pass(y_truth):
    global n_error
    error_prime = -(y_truth - n_y[2]) 
    derivative = n_y[2] * (1.0 - n_y[2]) 
    n_error[2] = error_prime * derivative
    derivative = 1.0 - n_y[0]**2 
    n_error[0] = n_w[2][1] * n_error[2] * derivative
    derivative = 1.0 - n_y[1]**2 
    n_error[1] = n_w[2][2] * n_error[2] * derivative

In [10]:
def adjust_weights(x):
    global n_w
    n_w[0] -= (x * lr * n_error[0])
    n_w[1] -= (x * lr * n_error[1])
    n2_inputs = np.array([1.0, n_y[0], n_y[1]])
    n_w[2] -= (n2_inputs * lr * n_error[2])

In [13]:
# train
all_correct = False
while not all_correct: 
    all_correct = True
    np.random.shuffle(index_list) 
    
for i in index_list: 
    forward_pass(x_train[i])
    backward_pass(y_train[i])
    adjust_weights(x_train[i])
    show_learning() 
    
for i in range(len(x_train)): 
    forward_pass(x_train[i])
    print(f'x1 = {x_train[i][1]}, x2 = {x_train[i][2]}, y ={n_y[2]}')
    if(((y_train[i] < 0.5) and (n_y[2] >= 0.5)) or ((y_train[i] >= 0.5) and (n_y[2] < 0.5))):
        all_correct = False

Current weights:
neuron = 0, w0 = 0.0075, w1 = 0.0994, w2 = 0.4177
neuron = 1, w0 = 0.0040, w1 = -0.4207, w2 = 0.0226
neuron = 2, w0 = 0.0066, w1 = 0.7857, w2 = 0.7934
--------------------------------------------------
Current weights:
neuron = 0, w0 = 0.0181, w1 = 0.1099, w2 = 0.4072
neuron = 1, w0 = 0.0137, w1 = -0.4110, w2 = 0.0129
neuron = 2, w0 = 0.0214, w1 = 0.7812, w2 = 0.7873
--------------------------------------------------
Current weights:
neuron = 0, w0 = 0.0102, w1 = 0.1021, w2 = 0.3994
neuron = 1, w0 = 0.0047, w1 = -0.4200, w2 = 0.0040
neuron = 2, w0 = 0.0082, w1 = 0.7748, w2 = 0.7921
--------------------------------------------------
Current weights:
neuron = 0, w0 = 0.0027, w1 = 0.1096, w2 = 0.4069
neuron = 1, w0 = -0.0035, w1 = -0.4118, w2 = 0.0122
neuron = 2, w0 = -0.0041, w1 = 0.7804, w2 = 0.7872
--------------------------------------------------
x1 = -1.0, x2 = -1.0, y =0.4808216978192937
x1 = -1.0, x2 = 1.0, y =0.6308757817853017
x1 = 1.0, x2 = -1.0, y =0.367018628