# Debug MNIST

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from tensorflow.examples.tutorials.mnist import input_data
import pdb

np.random.seed(0)

# Load Data

In [None]:
mnist = input_data.read_data_sets('MNIST-Dataset', one_hot=True)

In [None]:
train_x_raw = mnist.train.images
train_y_raw = mnist.train.labels
valid_x_raw = mnist.validation.images
valid_y_raw = mnist.validation.labels
test_x_raw = mnist.test.images
test_y_raw = mnist.test.labels

## Explore Data

In [None]:
def plot_mnist(data_x, data_y, n):
    fig = plt.figure(figsize=[16,9])
    for i in range(n):
        ax = fig.add_subplot(n//8, 8, i+1)
        ax.imshow(data_x[i].reshape([28,28]))
        ax.axis('off')
        idx = int(np.nonzero(data_y[i])[0])
        ax.set_title(idx)
    plt.show()

In [None]:
plot_mnist(train_x_raw, train_y_raw, 32)

In [None]:
plot_mnist(valid_x_raw, valid_y_raw, 8)

In [None]:
plot_mnist(test_x_raw, test_y_raw, 8)

In [None]:
plt.hist(train_x_raw[0:100].flatten(), bins=100);

In [None]:
print('train mean:', train_x_raw.mean(), 'std', train_x_raw.std())
print('valid mean:', valid_x_raw.mean(), 'std', valid_x_raw.std())
print('test mean:', test_x_raw.mean(), 'std', test_x_raw.std())

## Preprocess Data

In [None]:
tx_mean = train_x_raw.mean()
tx_std = train_x_raw.std()
train_x = (train_x_raw - tx_mean)/tx_std
valid_x = (valid_x_raw - tx_mean)/tx_std
test_x = (test_x_raw - tx_mean)/tx_std
train_y = train_y_raw
valid_y = valid_y_raw
test_y = test_y_raw
print('train mean:', train_x.mean(), 'std', train_x.std())
print('valid mean:', valid_x.mean(), 'std', valid_x.std())
print('test mean:', test_x.mean(), 'std', test_x.std())
print(train_x.shape, train_y.shape)
print(valid_x.shape, valid_y.shape)
print(test_x.shape, test_y.shape)

In [None]:
plot_mnist(train_x, train_y, 8)

# Fast import/restart

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from tensorflow.examples.tutorials.mnist import input_data
import pdb

np.random.seed(0)

mnist = input_data.read_data_sets('MNIST-Dataset', one_hot=True)

train_x_raw = mnist.train.images
train_y_raw = mnist.train.labels
valid_x_raw = mnist.validation.images
valid_y_raw = mnist.validation.labels
test_x_raw = mnist.test.images
test_y_raw = mnist.test.labels

# Preprocess
tx_mean = train_x_raw.mean()
tx_std = train_x_raw.std()
train_x = (train_x_raw - tx_mean)/tx_std
valid_x = (valid_x_raw - tx_mean)/tx_std
test_x = (test_x_raw - tx_mean)/tx_std
train_y = train_y_raw
valid_y = valid_y_raw
test_y = test_y_raw

## Build Model

In [None]:
def sigmoid(x, deriv=False):
    if deriv:
        return sigmoid(x)*(1-sigmoid(x))
    return 1 / (1 + np.exp(-x))

def tanh(x, deriv=False):
    if deriv:
        return 1. - np.tanh(x)**2
    return np.tanh(x)

def softssign(x, deriv=False):
    if deriv:
        dd = 1 + np.abs(x)
        return (dd - x*np.sign(x)) / dd**2
    return x / (1+np.abs(x))

def relu(x, deriv=False):
    if deriv:
        return 1. * (x>0)
    return np.maximum(0, x)

def lrelu(x, deriv=False):
    if deriv:
        dx = np.ones_like(x)
        dx[x < 0] = 0.01
        return dx
    return np.where(x > 0, x, x * 0.01)

In [None]:


def MSE(y, y_hat):
    assert y.ndim == 2
    assert y_hat.ndim == 2
    
    # avg over batch, sum over outputs (inner)
    return .5 * np.mean(np.sum((y-y_hat)**2, axis=-1))
    
    # no innner sum, becouse only one output
    return np.mean((y-y_hat)**2)

def acc(y, y_hat):
    return np.mean(np.argmax(y_hat, axis=-1)==np.argmax(y, axis=-1))

def fwd(x, W_hid, W_out, act_fun, ret=False):
    assert x.ndim == 2
    z_hid = x @ W_hid
    h_hid = act_fun(z_hid)  # hidden output

    z_out = h_hid @ W_out
    y_hat = sigmoid(z_out)  # SIGMOID!

    if ret:
        return y_hat, z_hid, h_hid, z_out
    return y_hat

def backprop(x, y, W_hid, W_out, act_fun):
    assert x.ndim == 2
    assert y.ndim == 2
    
    y_hat, z_hid, h_hid, z_out = fwd(x, W_hid, W_out, act_fun, ret=True)
    
    ro_out = (y-y_hat) * -1 * sigmoid(z_out, deriv=True)  # SIGMOID
    dW_out = h_hid.T @ ro_out / len(x)
    
    ro_hid = (ro_out @ W_out.T) * act_fun(z_hid, deriv=True)
    dW_hid = x.T @ ro_hid / len(x)
    
    return dW_hid, dW_out


    

## Numerical gradient check (optional)

In [None]:
def ngrad(x, y, W_hid, W_out, act_fun):
    
    eps = 1e-6
    
    gW_hid = np.zeros_like(W_hid)
    for r in range(W_hid.shape[0]):
        for c in range(W_hid.shape[1]):
            W_hid_plus = W_hid.copy()
            W_hid_minus = W_hid.copy()
            W_hid_plus[r,c] += eps
            W_hid_minus[r,c] -= eps
            loss_plus = MSE(y, fwd(x, W_hid_plus, W_out, act_fun))
            loss_minus = MSE(y, fwd(x, W_hid_minus, W_out, act_fun))
            gW_hid[r,c] = (loss_plus-loss_minus) / (2*eps)

    gW_out = np.zeros_like(W_out)
    for r in range(W_out.shape[0]):
        for c in range(W_out.shape[1]):
            W_out_plus = W_out.copy()
            W_out_minus = W_out.copy()
            W_out_plus[r,c] += eps
            W_out_minus[r,c] -= eps
            loss_plus = MSE(y, fwd(x, W_hid, W_out_plus, act_fun))
            loss_minus = MSE(y, fwd(x, W_hid, W_out_minus, act_fun))
            gW_out[r,c] = (loss_plus-loss_minus) / (2*eps)
    return gW_hid, gW_out

In [None]:
dW_hid, dW_out = backprop(train_x[0:3], train_y[0:3], W_hid, W_out, act_fun)
ngW_hid, ngW_out = ngrad(train_x[0:3], train_y[0:3], W_hid, W_out, act_fun)

In [None]:
assert np.allclose(dW_hid, ngW_hid)
assert np.allclose(dW_out, ngW_out)

## Train Loop - with traces

In [None]:
n_in = 784
n_hid = 128 # 12
n_out = 10
lr = 0.01  # 0.55

n_batch = 100
act_fun = sigmoid

np.random.seed(0)

# Initialize weights
W_hid = np.random.normal(0.0, n_in**-.5, [n_in, n_hid])
W_out = np.random.normal(0.0, n_hid**-.5, [n_hid, n_out])

# W_hid = np.random.uniform(0.0, .01, [n_in, n_hid])
# W_out = np.random.uniform(0.0, .01, [n_hid, n_out])

losses = {'batch':[], 'train':[], 'valid':[]}
accurs = {'batch':[], 'train':[], 'valid':[]}
traces = {'z_hid':[], 'z_out':[],
          #'dW_hid':[], 'dW_out':[],
          'W_hid':[], 'W_out':[]}

In [None]:
lr = 0.0

In [None]:
len(train_x)//10

In [None]:
ti_ = 0
train_i = np.array(range(len(train_x)))
for e in range(10):
    print(e)
    np.random.shuffle(train_i)
    
    for k, v in traces.items():
        v.append([])
    
    for i in range(0, len(train_x), n_batch):
        # print(i,',', end='')
        # Get 128 sized batch, both as 2d arrays   
        batch = train_i[i:i+n_batch]
        x = train_x[batch]
        y = train_y[batch]

        # Forward pass
        y_hat, z_hid, _, z_out = fwd(x, W_hid, W_out, act_fun, ret=True)
        
        # Calc batch loss (before update!)
        losses['batch'].append(MSE(y, y_hat))
        accurs['batch'].append(acc(y, y_hat))

        # Backpropagation
        dW_hid, dW_out = backprop(x, y, W_hid, W_out, act_fun)
        W_hid += -lr * dW_hid
        W_out += -lr * dW_out

        # Calc full loss, usually only every epoch or so
#         if i == 0:
#             train_y_hat = fwd(train_x, W_hid, W_out, act_fun)
#             train_loss = MSE(train_y, train_y_hat)
#             losses['train'].append(train_loss)

#         valid_y_hat = fwd(valid_x, W_hid, W_out, act_fun)
#         valid_loss = MSE(valid_y, valid_y_hat)
#         losses['valid'].append(valid_loss)

        # Trace
#         traces['z_hid'][-1] += list(z_hid)
#         traces['z_out'][-1] += list(z_out)
        
        traces['z_hid'][-1].append(z_hid)
        traces['z_out'][-1].append(z_out)
        
        if ti_ % 10 == 0:
        #traces['dW_hid'][-1].append(dW_hid)
        #traces['dW_out'][-1].append(dW_out)
            traces['W_hid'][-1].append(W_hid.copy())
            traces['W_out'][-1].append(W_out.copy())
        
        ti_ += 1

tr_z_hid = np.array(traces['z_hid'])
tr_z_out = np.array(traces['z_out'])
#tr_dW_hid = np.array(traces['dW_hid'])
#tr_dW_out = np.array(traces['dW_out'])
tr_W_hid = np.array(traces['W_hid'])
tr_W_out = np.array(traces['W_out'])

print('tr_z_hid', tr_z_hid.shape)
print('tr_z_out', tr_z_out.shape)
#print('tr_dW_hid', tr_dW_hid.shape)
#print('tr_dW_out', tr_dW_out.shape)
print('tr_W_hid', tr_W_hid.shape)
print('tr_W_out', tr_W_out.shape)

In [None]:
print('Final train loss:', losses['train'][-1])
print('Final valid loss:', losses['valid'][-1])

In [None]:
train_y_hat = fwd(train_x, W_hid, W_out, act_fun)
train_loss = MSE(train_y, train_y_hat)
train_loss

In [None]:
valid_y_hat = fwd(valid_x, W_hid, W_out, act_fun)
valid_loss = MSE(valid_y, valid_y_hat)
valid_loss

# Loss, output

In [None]:
fig, ax = plt.subplots(figsize=[12,6])
ax.plot(losses['batch'], label='Mini-Batch loss')
#ax.plot(losses['train'], label='Training loss')
ax.plot(losses['valid'], label='Validation loss')
ax.plot(accurs['batch'], label='Mini-Batch accuracy', color='red')

ax.legend()
ax.set_ylim(0, 1)
ax.grid()

In [None]:
fig, ax = plt.subplots(figsize=[12,6])
ax.plot(losses['batch'], label='Mini-Batch loss')
#ax.plot(losses['train'], label='Training loss')
ax.plot(losses['valid'], label='Validation loss')
ax.plot(accurs['batch'], label='Mini-Batch accuracy', color='red')

ax.legend()
ax.set_ylim(0, 1)
ax.grid()

Reference

In [None]:
y_hat = fwd(train_x, W_hid, W_out, act_fun)
y_correct = np.argmax(train_y, axis=-1) == np.argmax(y_hat, axis=-1)
classes_correct = []
classes_all = []
for i in range(10):
    is_y_class_i = y_correct * (np.argmax(train_y, axis=-1)==i)
    nb_correct_class_i = np.sum(is_y_class_i)
    classes_correct.append(nb_correct_class_i)
    classes_all.append(np.count_nonzero(np.argmax(train_y,axis=-1)==i))
classes_correct = np.array(classes_correct)
classes_all = np.array(classes_all)

In [None]:
plt.bar(range(10), classes_all, label='All Member')
plt.bar(range(10), classes_correct, label='Correctly Predicted')
plt.legend(loc=3)

# Weight Plots

In [None]:
def running_mean(x, n):
    return np.array([ np.mean(x[max(i-n+1, 0): i+1]) for i in range(len(x))])

In [None]:
def running_std(x, n):
    return np.array([ np.std(x[max(i-n+1, 0): i+1]) for i in range(len(x))])

Plot output neuron weights

In [None]:
tr_W_out.shape

In [None]:
neuron = 9

ne, ni, nw, nn = tr_W_out.shape

fig = plt.figure(figsize=[16,6])
ax = fig.add_subplot(111)

ax.plot(tr_W_out.reshape([ne*ni,nw,nn])[:,:,neuron])
plt.tight_layout()
plt.show()
del ne, ni, nw, nn

Plot hidden neuron weights

In [None]:
start_neuron = 0
end_neuron = 12
start_weight = 0
end_weight = 50

fig = plt.figure(figsize=[16,280])
ne, ni, nw, nn = tr_W_hid.shape
for i in range(start_neuron, end_neuron):
    ax = fig.add_subplot(80,3,i+1)
    tmp = tr_W_hid.reshape(ne*ni, nw, nn)
    ax.plot(tmp[:,start_weight:end_weight,i], alpha=0.5)
    ax.set_title('Neuron #'+str(i))
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=[16,180])
ne, ni, nw, nn = tr_W_hid.shape
for i in range(nn):
    ax = fig.add_subplot(80,3,i+1)
    tmp = np.reshape(tr_W_hid, (ne*ni, nw, nn))
    ax.plot(tmp[:,:,i], alpha=0.5)
    ax.set_title('Neuron #'+str(i))
plt.tight_layout()
plt.show()


# Activation Histograms

In [None]:
def plot_3d_histogram(data, funct=lambda x: x, color=(1,0,0,1), alpha=1, ax=None, figsize=None):
    """
    
    Params:
        data - 2d array, dims: [epochs, samples].
               E.g. for single neuron activations: [[ 0.1, 0.2, 0.3, ... ]  activations in epoch 0, sample 1, 2, 3...
                                                    [ 0.2, 0.1, 0.3, ... ]  activations in epoch 1, ...
                                                    ...
                                                    [ 0.3, 0.1, 0.2, ... ]] activations is last training epoch
        funct - function to apply to data before plotting, e.g. sigmoid, tanh, usually none
    """
    assert data.ndim==2
    
    def interpolate_colors(cstart, cend, n):
        cstart, cend = np.array(cstart), np.array(cend)
        assert cstart.shape == (4,)
        assert cend.shape == (4,)
        if n == 1:  return cend    # if one step, then return end color

        cols = []
        for i in range(n):
            step = i/(n-1)
            cols.append( (1-step)*cstart + step*cend)
        return np.array(cols)
    
    color = np.array(color)
    color_start = np.array(color/4, dtype=float)  # transparent black
    color_end = np.array(color)
    colors = interpolate_colors(color_start, color_end, len(data))
    

    if ax is None:
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111, projection='3d')
    
    ax.set_xlabel('value'); ax.set_ylabel('epoch'); ax.set_zlabel('n')
    ax.view_init(30, -85)
    # ax.set_xlim(0, 1)
    
    for epoch in range(len(data)):                                     # One line per epoch
        hist, bins = np.histogram(funct(data[epoch,:]), bins=100)      # apply funct and create histogram
        bins = (bins[:-1] + bins[1:])/2                                # center bins
        hist = hist / np.sum(hist)
        ax.plot(xs=bins, ys=hist,
                zs=-epoch,
                zdir='y', 
                color=colors[epoch])
        if epoch == 0:
            nb_epochs = len(data)
            #ax.plot(xs=[-1,-1], ys=[0,0], zs=[-nb_epochs,0], zdir='y', color='k', ls='--')
            ax.plot(xs=[0,0], ys=[0,0], zs=[-nb_epochs,0], zdir='y', color='k')
            ax.plot(xs=[1,1], ys=[0,0], zs=[-nb_epochs,0], zdir='y', color='k', ls='--')
        if epoch == len(data)-1:
            ax.plot(xs=[bins[0],bins[-1]], ys=[0,0], zs=-nb_epochs, zdir='y', color='k')

In [None]:
fig = plt.figure(figsize=[16,9])
ax = fig.add_subplot(111)

ne, ni, na, nn = tr_z_out.shape
do = tr_z_out.reshape([ne*ni,-1])
do = act_fun(do)  # linear output!
xx = list(range(ne*ni))
yy = np.mean(do, axis=-1)
err = np.std(do, axis=-1)
line = ax.plot(xx, yy, label='Output mean/std')[0]
ax.errorbar(xx, yy, err, alpha=0.1, ls='none', color=line.get_color())

ne, ni, na, nn = tr_z_hid.shape
dd = tr_z_hid.reshape([ne*ni,-1])
dd = act_fun(dd)
xx = list(range(ne*ni))
yy = np.mean(dd, axis=-1)
err = np.std(dd, axis=-1)
line = ax.plot(xx, yy, label='Hidden mean/std')[0]
ax.errorbar(xx, yy, err, alpha=0.1, ls='none', color=line.get_color())

#ax.set_ylim(0,1)

plt.legend()
plt.tight_layout(); plt.show()

In [None]:
fig = plt.figure(figsize=[16,200])

ne, ni, na, nn = tr_z_hid.shape
tmp_act_hid = tr_z_hid.reshape([ne,ni*na,nn])

for n in range(tr_z_hid.shape[-1]):
    ax = fig.add_subplot(80, 4, n+1, projection='3d')
    plot_3d_histogram(tmp_act_hid[::,:,n], funct=lambda x: sigmoid(x), ax=ax)
    ax.set_title('Neuron #' + str(n))

plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=[16,6])

ax = fig.add_subplot(121, projection='3d')
ne, ni, na, nn = tr_z_hid.shape
dd = tr_z_hid.reshape([ne,-1])
plot_3d_histogram(dd, funct=lambda x: x, color=(0,0,0,1), ax=ax)

ax = fig.add_subplot(122, projection='3d')
plot_3d_histogram(dd, funct=lambda x: sigmoid(x), color=(1,0,0,1), ax=ax)


plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=[16,6])

ax = fig.add_subplot(121, projection='3d')
ne, ni, na, nn = tr_z_out.shape
dd = tr_z_out.reshape([ne,-1])
plot_3d_histogram(dd, funct=lambda x: x, color=(0,0,0,1), ax=ax)

ax = fig.add_subplot(122, projection='3d')
plot_3d_histogram(dd, funct=lambda x: sigmoid(x), color=(0,0,1,1), ax=ax)


plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=[16,45])

ne, ni, na, nn = tr_z_out.shape
tmp_act_out = tr_z_out.reshape([ne,ni*na,nn])

for n in range(tr_z_out.shape[-1]):
    
    ax = fig.add_subplot(10, 3, (n*3)+1, projection='3d')
    plot_3d_histogram(tmp_act_out[::,:,n], funct=lambda x: x, color=(0,0,0,1), ax=ax)
    ax.set_title('Neuron #' + str(n))
    
    ax = fig.add_subplot(10, 3, (n*3)+2, projection='3d')
    plot_3d_histogram(tmp_act_out[::,:,n], funct=lambda x: sigmoid(x), color=(0,0,1,1), ax=ax)
    ax.set_title('Neuron #' + str(n))
    
    ax = fig.add_subplot(10, 3, (n*3)+3)
    hist, bins = np.histogram(train_y[:,0])
    bins = (bins[:-1] + bins[1:])/2    # center bins
    hist = hist / sum(hist)
    ax.bar(bins,hist, width=0.1)

plt.tight_layout()
plt.show()

# Other

In [None]:
aa_W_out = tr_W_out.reshape([-1,12,1])[1:]
bb_W_out = tr_W_out.reshape([-1,12,1])[:-1]
#print('aa_W_out', aa_W_out.shape)
#print('bb_W_out', bb_W_out.shape)
diff_W_out = aa_W_out - bb_W_out
#print('diff_W_out', diff_W_out.shape)
relat_out = diff_W_out / aa_W_out
print('relat_out', relat_out.shape)

fig = plt.figure(figsize=[16,9])
ax = fig.add_subplot(111)

for i in range(3): #relat_out.shape[1]):
    to_plot = relat_out[:,i:i+1,0]
    #print('to_plot', to_plot.shape)
    
    
    line = ax.plot(to_plot, alpha=0.1)[0]
    ax.plot(running_mean(to_plot, n=1000), color=line.get_color());
    ax.plot(running_std(to_plot, n=1000), color=line.get_color());
    ax.plot([0,len(to_plot)],[0.0, 0.0], linestyle='--', color='k')
    #ax.plot([0,900],[0.01, 0.01], linestyle='--')
    # ax.ylim(0, 0.2)
    # ax.set_yscale('log')
    ax.set_ylim([-.1,0.1])
plt.tight_layout()
plt.show()

In [None]:
tr_W_out.shape

In [None]:
aa_W_out = tr_W_out[:,1:,:,:]
bb_W_out = tr_W_out[:,:-1,:,:]
print('aa_W_out', aa_W_out.shape)
print('bb_W_out', bb_W_out.shape)
diff_W_out = aa_W_out - bb_W_out
print('diff_W_out', diff_W_out.shape)
relat_out = diff_W_out / aa_W_out
print('relat_out', relat_out.shape)

out_weight = relat_out[:,:,0,0]
print('out_weight', out_weight.shape)

plot_3d_histogram(np.clip(out_weight, -0.1, 0.1), figsize=[16,9])

In [None]:
aa_W_out = tr_W_out.reshape([-1,12,1])[1:]
bb_W_out = tr_W_out.reshape([-1,12,1])[:-1]
print('aa_W_out', aa_W_out.shape)
print('bb_W_out', bb_W_out.shape)
diff_W_out = aa_W_out - bb_W_out
print('diff_W_out', diff_W_out.shape)
relat_out = diff_W_out / aa_W_out
print('relat_out', relat_out.shape)  
to_plot = relat_out[:,-1:,0]
#print('to_plot', to_plot.shape)
fig = plt.figure(figsize=[16,3])
ax = fig.add_subplot(111)
ax.plot(to_plot, alpha=0.2);
ax.plot(running_mean(to_plot, n=100));
ax.plot(running_mean(np.abs(to_plot), n=100));
ax.plot(running_std(np.abs(to_plot), n=100));
ax.plot([0,900],[0.0, 0.0], linestyle='--')
ax.plot([0,900],[0.01, 0.01], linestyle='--')
# ax.ylim(0, 0.2)
# ax.set_yscale('log')
ax.set_ylim([-.02,0.02])
plt.tight_layout()
plt.show()

In [None]:
def get_ratios(weights):
    assert weights.ndim==3
    
    aa_W = weights.reshape([-1,12,1])[1:]
    bb_W = weights.reshape([-1,12,1])[:-1]
    diff_W = bb_W - aa_W
    relat = diff_W / aa_W
    
    assert relat.ndim == 3
    return relat

In [None]:
ratios = get_ratios(tr_W_out.reshape([-1,12,1]))
to_plot = ratios[-1000:,:,0]

print('to_plot', to_plot.shape)
plt.plot(np.abs(to_plot), alpha=0.5);
#plt.plot(running_mean(np.abs(to_plot), n=100)[100:]);
plt.plot([0,900],[0.01, 0.01], linestyle='--')
# plt.ylim(0, 0.2)
plt.yscale('log')
    
    


In [None]:
aa_W_out = tr_W_out.reshape([-1,12,1])
bb_dW_out = tr_dW_out.reshape([-1,12,1])
print('aa_W_out', aa_W_out.shape)
print('bb_dW_out', bb_dW_out.shape)
relat_out = (bb_dW_out / aa_W_out)
print('relat_out', relat_out.shape)
to_plot = relat_out[-1000:,:,0]
print('to_plot', to_plot.shape)

plt.plot(running_mean(lr*to_plot, n=100)[100:]);


#plt.plot(running_mean(bb_dW_out / aa_W_out, n=100)[-1000:,:,0], alpha=1)
# plt.ylim(0, 0.2)
#plt.yscale('log')

In [None]:
bb_dW_out.shape

# Weights

In [None]:
tr_z_hid.shape

Zoom in one neuron

In [None]:
fig = plt.figure(figsize=[16,9])

ax = fig.add_subplot(111, projection='3d')
plot_3d_histogram(tr_z_hid[:,:,1], funct=lambda x: x, ax=ax)
ax.set_title('Neuron #1')
#ax.set_xscale('log')

plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=[16,18])

for n in range(tr_z_hid.shape[-1]):
    ax = fig.add_subplot(8, 3, n+1, projection='3d')
    plot_3d_histogram(tr_z_hid[:,:,n], funct=lambda x: x, ax=ax)
    ax.set_title('Neuron #' + str(n))

plt.tight_layout()
plt.show()

In [None]:
ne, na, nn = tr_z_hid.shape
tmp = tr_z_hid.reshape([-1,nn])
print(tmp.shape)

In [None]:
plt.plot(running_mean(act_fun(tmp[:,0]), n=1000))
#plt.yscale('log')

In [None]:
plt.plot(running_std(act_fun(tmp[:,0]), n=1000))
#plt.yscale('log')

In [None]:
tr_z_hid.shape

In [None]:
#fig = plt.figure(figsize=[16,18])
#ax = fig.add_subplot(111)


#plt.tight_layout()
#plt.show()

ne, na, nn = tr_z_hid.shape
plt.plot(np.mean(act_fun(tr_z_hid), axis=1))
plt.show()
plt.plot(np.std(act_fun(tr_z_hid), axis=1))
plt.show()

In [None]:
tr_z_hid.shape