## Classification (b)

In [31]:
from neuralnet_inside import *
from random import shuffle
import numpy as np

# Load the configuration.
config = load_config("./data")

X_train, y_train, X_stats = load_data(path="./data", stats=None, mode="train")

# Get 10 examples, 1 from each category.
X_sub = []
y_sub = []
for k in range(10):
    indices = y_train[:,k] == 1
    X_sub.append(X_train[indices][0])
    y_sub.append(y_train[indices][0])
print(len(X_sub), len(y_sub))
X_sub = np.array(X_sub)
y_sub = np.array(y_sub)

# Load model para
config_prob_b = {}
config_prob_b['layer_specs'] = [3072, 64,64, 10]
config_prob_b['activation'] = 'tanh'
config_prob_b['learning_rate'] = 0.01
config_prob_b['batch_size'] = 128 
config_prob_b['epochs'] = 100  
config_prob_b['early_stop'] = True 
config_prob_b['early_stop_epoch'] = 5  
config_prob_b['L2_penalty'] = 0  
config_prob_b['momentum'] = True
config_prob_b['momentum_gamma'] = 0.9  



(50000, 32, 32, 3)
inp: (50000, 32, 32, 3)
10 10


In [35]:
# d_b d_w comparison

## Part (b) Estimation of bias weight and weight
def Num_Est_b(model, layer, eps, output_idx):
    layer.b[0][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] += eps # back to normal
    #print("diff:", loss_1 - loss_2)
    #print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps)*10 # Numerical estimation for dEdW

def Num_Est_w(model, layer, eps, input_idx, output_idx):
    layer.w[input_idx][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] += eps # back to normal
    #print("diff:", loss_1 - loss_2)
    #print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps)*10 # Numerical estimation for dEdW

# Weights to modify:
# 1 output bias weight
# 1 hidden bias weight for each hidden layer
# 2 hidden to output weights
# 2 input to hidden weights
# Show that the grad between is within O(eps^2) of backprop weights.
db = []
dw = []
db_est = []
dw_est = []

eps = 1e-6


model = Neuralnetwork(config_prob_b)

loss_train = model.forward(X_sub, targets=y_sub)
model.backward()


d_w_lst, d_b_lst = [], []
for layer in model.layers:
    if isinstance(layer, Layer):
        d_b = Num_Est_b(model, layer,eps,0) # model, layer, eps, output_idx
        d_w_1 = Num_Est_w(model, layer,eps,0,0) #model, layer, eps, output_idx
        d_w_2 = Num_Est_w(model, layer,eps,0,1)
        print('b gradient: ', layer.d_b[0], 'numerical estimation: ', d_b)
        print('w1 gradient: ', layer.d_w[0][0], 'numerical estimation: ', d_w_1)
        print('w2 gradient: ', layer.d_w[0][1], 'numerical estimation: ', d_w_2)
        print('b diff: ', layer.d_b[0]+d_b)
        print('w1 diff: ', layer.d_w[0][0]+d_w_1)
        print('w2 diff: ', layer.d_w[0][1]+d_w_2)        








b gradient:  0.0 numerical estimation:  0.0
w1 gradient:  0.0 numerical estimation:  0.0
w2 gradient:  -0.024485076445083118 numerical estimation:  0.024434207990253753
b diff:  0.0
w1 diff:  0.0
w2 diff:  -5.0868454829364196e-05
b gradient:  -0.004891853728864167 numerical estimation:  0.004892077853924093
w1 gradient:  0.0048915870566938555 numerical estimation:  -0.004891810290175158
w2 gradient:  7.363770472256348e-05 numerical estimation:  -7.353673225907187e-05
b diff:  2.2412505992601417e-07
w1 diff:  -2.2323348130258436e-07
w2 diff:  1.0097246349161212e-07
b gradient:  -0.009801115683931015 numerical estimation:  0.012915655211998
w1 gradient:  0.20718969368826878 numerical estimation:  -0.20359102292921705
w2 gradient:  0.16683971376113468 numerical estimation:  -0.16645665623649109
b diff:  0.0031145395280669855
w1 diff:  0.0035986707590517364
w2 diff:  0.00038305752464359855


In [None]:



# Hidden Layer 1
hidden_layer_1 = model_b.layers[0]
# Hidden Layer 2
hidden_layer_2 = model_b.layers[2]
# Output Layer
output_layer = model_b.layers[4]

# 1 output bias weight
input_idx = 0
output_idx = 1
cur_layer = output_layer
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
print(cur_layer.d_b.shape)
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Output bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 1 hidden bias weight for hidden layer 2
cur_layer = hidden_layer_2
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Hidden Layer 2 bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 1 hidden bias weight for hidden layer 1
cur_layer = hidden_layer_1
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Hidden Layer 1 bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 2 hidden to output weights
input_idx = 0
output_idx = 1
cur_layer = hidden_layer_2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden to output weight diff 1: {dw_est[-1] - dw[-1]:.6f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Hidden to output weight diff 2: {dw_est[-1] - dw[-1]:.6f}")

# 2 input to hidden weights
input_idx = 0
output_idx = 0
cur_layer = hidden_layer_1
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Input to hidden 1 weight diff 1: {dw_est[-1] - dw[-1]:.6f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Input to hidden 2 weight diff 2: {dw_est[-1] - dw[-1]:.6f}")


In [None]:
# d_b d_w comparison

## Part (b) Estimation of bias weight and weight
def Num_Est_b(model, layer, eps, output_idx):
    layer.b[0][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] += eps # back to normal
    print("diff:", loss_1 - loss_2)
    print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps) # Numerical estimation for dEdW

def Num_Est_w(model, layer, eps, input_idx, output_idx):
    layer.w[input_idx][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] += eps # back to normal
    print("diff:", loss_1 - loss_2)
    print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps) # Numerical estimation for dEdW

print("Layers:", len(model_b.layers))
# Weights to modify:
# 1 output bias weight
# 1 hidden bias weight for each hidden layer
# 2 hidden to output weights
# 2 input to hidden weights
# Show that the grad between is within O(eps^2) of backprop weights.
db = []
dw = []
db_est = []
dw_est = []

eps = 0.0001
# Hidden Layer 1
hidden_layer_1 = model_b.layers[0]
# Hidden Layer 2
hidden_layer_2 = model_b.layers[2]
# Output Layer
output_layer = model_b.layers[4]

# 1 output bias weight
input_idx = 0
output_idx = 1
cur_layer = output_layer
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
print(cur_layer.d_b.shape)
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Output bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 1 hidden bias weight for hidden layer 2
cur_layer = hidden_layer_2
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Hidden Layer 2 bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 1 hidden bias weight for hidden layer 1
cur_layer = hidden_layer_1
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[0][output_idx]) 
print(f"Hidden Layer 1 bias weight diff: {db_est[-1] - db[-1]:.6f}")

# 2 hidden to output weights
input_idx = 0
output_idx = 1
cur_layer = hidden_layer_2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden to output weight diff 1: {dw_est[-1] - dw[-1]:.6f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Hidden to output weight diff 2: {dw_est[-1] - dw[-1]:.6f}")

# 2 input to hidden weights
input_idx = 0
output_idx = 0
cur_layer = hidden_layer_1
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Input to hidden 1 weight diff 1: {dw_est[-1] - dw[-1]:.6f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print(f"Input to hidden 2 weight diff 2: {dw_est[-1] - dw[-1]:.6f}")
