## Classification (b)

In [18]:
from neuralnet import *
from random import shuffle
import numpy as np

# Load the configuration.
config = load_config("./data")

X_train, y_train, X_stats = load_data(path="./data", stats=None, mode="train")

# Get 10 examples, 1 from each category.
X_sub = []
y_sub = []
for k in range(10):
    indices = y_train[:,k] == 1
    X_sub.append(X_train[indices][0])
    y_sub.append(y_train[indices][0])
print(len(X_sub), len(y_sub))
X_sub = np.array(X_sub)
y_sub = np.array(y_sub)

# Load model para
config_prob_b = {}
config_prob_b['layer_specs'] = [3072, 64,64, 10]
config_prob_b['activation'] = 'tanh'
config_prob_b['learning_rate'] = 0.001
config_prob_b['batch_size'] = 128 
config_prob_b['epochs'] = 100  
config_prob_b['early_stop'] = True 
config_prob_b['early_stop_epoch'] = 5  
config_prob_b['L2_penalty'] = 0 
config_prob_b['momentum'] = True
config_prob_b['momentum_gamma'] = 0.9  



(50000, 32, 32, 3)
10 10


In [10]:
# d_b d_w comparison

## Part (b) Estimation of bias weight and weight
def Num_Est_b(model, layer, eps, output_idx):
    layer.b[0][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.b[0][output_idx] += eps # back to normal
    #print("diff:", loss_1 - loss_2)
    #print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps)  # Numerical estimation for dEdW

def Num_Est_w(model, layer, eps, input_idx, output_idx):
    layer.w[input_idx][output_idx] += eps # E(w+e)
    loss_1 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] -= 2*eps # E(w-e)
    loss_2 = model.forward(X_sub, y_sub)[1]
    layer.w[input_idx][output_idx] += eps # back to normal
    #print("diff:", loss_1 - loss_2)
    #print("loss:", (loss_1 - loss_2) / (2 * eps))
    return (loss_1 - loss_2) / (2 * eps)  # Numerical estimation for dEdW

model_b = Neuralnetwork(config_prob_b)

loss_train = model_b.forward(X_sub, targets=y_sub)
model_b.backward()

print("Layers:", len(model_b.layers))
# Weights to modify:
# 1 output bias weight
# 1 hidden bias weight for each hidden layer
# 2 hidden to output weights
# 2 input to hidden weights
# Show that the grad between is within O(eps^2) of backprop weights.
db = []
dw = []
db_est = []
dw_est = []

eps = 0.01

print(f"epsilon used, expected diff: {eps , eps**2}")


# Hidden Layer 1
hidden_layer_1 = model_b.layers[0]
# Hidden Layer 2
hidden_layer_2 = model_b.layers[2]
# Output Layer
output_layer = model_b.layers[4]

## Bias
# 1 output bias weight
input_idx = 0
output_idx = 1
cur_layer = output_layer
print(cur_layer.d_b.shape)

db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[output_idx]) 
print("actual:", db[-1])
print("est:", db_est[-1])
print(f"Output bias weight diff: {db_est[-1] - db[-1]:.10f}")

# 1 hidden bias for hidden layer 2
cur_layer = hidden_layer_2
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[output_idx]) 
print("actual:", db[-1])
print("est:", db_est[-1])
print(f"Hidden Layer 2 bias weight diff: {db_est[-1] - db[-1]:.10f}")

# 1 hidden bias for hidden layer 1
cur_layer = hidden_layer_1
db_est.append(Num_Est_b(model_b, cur_layer, eps, output_idx))
db.append(cur_layer.d_b[output_idx]) 
print("actual:", db[-1])
print("est:", db_est[-1])
print(f"Hidden Layer 1 bias weight diff: {db_est[-1] - db[-1]:.10f}")


## Weight

# 2 hidden to output weights
input_idx = 0
output_idx = 1
cur_layer = output_layer
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden2 to output weight diff 1: {dw_est[-1] - dw[-1]:.10f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden2 to output weight diff 2: {dw_est[-1] - dw[-1]:.10f}")


# 1 hidden to 2 hidden weights
input_idx = 0
output_idx = 1
cur_layer = hidden_layer_2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden1 to Hidden2 weight diff 1: {dw_est[-1] - dw[-1]:.10f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Hidden1 to Hidden2 weight diff 2: {dw_est[-1] - dw[-1]:.10f}")

# 2 input to hidden weights
input_idx = 0
output_idx = 1
cur_layer = hidden_layer_1
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Input to hidden 1 weight diff 1: {dw_est[-1] - dw[-1]:.10f}")
input_idx = 0
output_idx = 2
dw_est.append(Num_Est_w(model_b, cur_layer, eps, input_idx, output_idx))
dw.append(cur_layer.d_w[input_idx][output_idx])
print("actual:", dw[-1])
print("est:", dw_est[-1])
print(f"Input to hidden 2 weight diff 2: {dw_est[-1] - dw[-1]:.10f}")


Layers: 5
epsilon used, expected diff: (0.01, 0.0001)
(10,)
actual: -0.03165755023252677
est: -0.0032040621349538334
Output bias weight diff: 0.0284534881
actual: 7.325320419415491e-05
est: 7.315655004003219e-06
Hidden Layer 2 bias weight diff: -0.0000659375
actual: -0.021807368175824994
est: -0.002176287567512425
Hidden Layer 1 bias weight diff: 0.0196310806
actual: -0.16683971376113468
est: -0.01664568843646741
Hidden2 to output weight diff 1: 0.1501940253
actual: 0.003184207354738211
est: 0.00019100993028375868
Hidden2 to output weight diff 2: -0.0029931974
actual: -7.363770472256348e-05
est: -7.354105180379378e-06
Hidden1 to Hidden2 weight diff 1: 0.0000662836
actual: -0.10628993898014337
est: -0.010628584376004557
Hidden1 to Hidden2 weight diff 2: 0.0956613546
actual: 0.024485076445083118
est: 0.002443754042669166
Input to hidden 1 weight diff 1: -0.0220413224
actual: 8.305234966487929e-11
est: 8.304468224196171e-12
Input to hidden 2 weight diff 2: -0.0000000001


In [11]:
for layer in model_b.layers:
    if isinstance(layer, Layer):
        d_b.append(layer.d_b[1] * 10) # multiply by the scaling factor
        d_w.append([np.multiply(layer.d_w[0][1], 10) ,np.multiply(layer.d_w[0][2], 10)]) # multiply by the scaling factor
print('Real b: {}'.format(d_b))
print('Estimate b: {}'.format(d_b_estimate))
print('Real w: {}'.format(d_w))
print('Estimate w: {}'.format( d_w_estimate))

NameError: name 'd_b' is not defined