In [None]:
from autograd import numpy as np
from autograd import grad
import numpy.random as npr

import matplotlib.pyplot as plt

import sklearn.datasets as datasets

In [None]:
def log_loss(y_tgt, pred):
    
    num_classes = y_tgt.shape[1]
    N = y_tgt.shape[0]
    
    return -1/N * np.sum( y_tgt * np.log(pred[:,:num_classes]) \
                      + (1. - y_tgt) * np.log(1. - pred[:,:num_classes]) )

def mse_loss(y_tgt, pred):
    
    return np.mean((y_tgt - pred)**2)


def mae_loss(y_tgt, pred):
    
    return np.mean(np.abs(y_tgt - pred))
    
def get_log_loss(nn_fn, x, weights, acts, y_tgt):
    
    pred = nn_fn(x, weights, acts)
    
    loss = log_loss(y_tgt, pred) 
    
    return loss

    
def get_mse_loss(nn_fn, x, weights, acts, y_tgt):
    
    pred = nn_fn(x, weights, acts)
    
    loss = mse_loss(y_tgt, pred) 
    
    return loss

def get_accuracy(y, pred):
    
    num_classes = y.shape[1]
    
    temp = [1.0 * (elem1 == elem2) \
            for elem1, elem2 in zip(np.argmax(y,axis=1), np.argmax(pred[:,:num_classes],axis=1))]
    
    return np.mean(temp) 

gradloss = grad(get_log_loss)
grad_mse_loss = grad(get_mse_loss)

def nn_pass(x, weights, acts, forward=True):
    
    assert len(weights) == len(acts)

    
    #if not(forward):
    #    acts.reverse()
    
    output_mask = np.zeros((1, weights[-1].shape[1]))
    output_mask[:, :10] += 1.0
    
    for w, a in zip(weights, acts):
        
        x = a(np.matmul(x,w))
    
 
    x = x * output_mask    
    x = acts[-1](x)
    return x       
        

def bnn_pass(x, weights, act, forward=True, regression=True):
    
    if forward:
        c = 1.0
    else:
        c = -1.0
    
    dim_x = x.shape[1]   
    output_mask = np.zeros((1, 2*weights[-1].shape[1]))
    output_mask[:, :10] += 1.0
    
    halfsies = dim_x // 2
    
    flips = [1]
    
    for ii in range(1, len(weights)):
        flips.append(1.0 - flips[-1])
        
    v1 = x[:,:halfsies]
    v2 = x[:,halfsies:]
    
    assert len(weights) == len(act)
    
    if not(forward):
        acts.reverse()
    
     
    for ii in range(len(weights)): #w, a, flip  in zip(weights, acts, flips):
        
        v1 = v1
         
        v2 = v2 + c * acts[ii](np.matmul(v1, weights[ii]))
        
        v = np.append(v1, v2, axis=1)
        
        if flips[ii]:
            v1 = v[:,:halfsies]
            v2 = v[:,halfsies:]
        else:
            v1 = v[:,halfsies:]
            v2 = v[:,:halfsies]
    
    
    #final layer is not bijective
    #v = acts[-1](np.matmul(v, weights[-1]))
    if not regression:
        v = v * output_mask    
        v = acts[-1](v)
    
    return v

def softmax(x):
    
    x = x - np.max(x, axis=1, keepdims=True) + 1.e-5
    
    return np.exp(x)  / np.sum(np.exp(x), axis=1, keepdims=True)


def get_one_hot(tgt_y):
    
    num_samples, num_classes = np.shape(tgt_y)[0], np.max(1+tgt_y)
    
    one_hot_y = np.array([[1.0 if jj == tgt_y[ii] else 0.0 for jj in range(num_classes)] \
                      for ii in range(num_samples)])
    return one_hot_y 
    
def leaky_relu(x):
    
    #x[x <= 0.0] = 0.0 # -0.02* x[x<0]
    
    return x**2+ 1e-3
    

def adam_update(l_grad, l_m=None, l_v=None):
    # n = running exponential average of first moment of gradient
    # v = running exponential average of second moment of gradient
    # grad = gradient of current batch
    
    β1 = 0.9
    β2 = 0.999
    ϵ = 1e-7
    
    l_update = []
    l_mt1 = []
    l_vt1 = []
    
    if l_m is None:
        l_m = [elem * 0.0 for elem in l_grad]
    if l_v is None:
        l_v = [elem * 0.0 for elem in l_grad]
            
    for my_grad, m, v in zip(l_grad, l_m, l_v):
    
        m = β1 * m + (1-β1) * my_grad
        v = β2 * v + (1-β2) * my_grad**2
    
        l_update.append((m / (np.sqrt(v) + ϵ))) 
        l_mt1.append(m)
        l_vt1.append(v)
    
    return l_update, l_mt1, l_vt1

def relu(x):
    return x * (x > 0.0)

def leaky_relu(x):
    return x * (x > 0.0) + 0.1 * x * (x <= 0.0)

def prelu(x,a=0.01):
    return x * (x > 0.0) + a * x * (x <= 0.0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def swish(x):
    return x * sigmoid(x)

def elu(x, a = 0.1): 
    return x * (x > 0.0) + a * (np.exp(x)-1) * (x <= 0.0)
    

def sinc(x):

    return np.sin(16*x) / (16*x)

In [None]:
def bnn_pass(x, weights, act, forward=True, regression=True):
    
    if forward:
        c = 1.0
    else:
        c = -1.0
    
    dim_x = x.shape[1]   
    output_mask = np.zeros((1, 2*weights[-1].shape[1]))
    output_mask[:, :10] += 1.0
    
    halfsies = dim_x // 2
    
    flips = [1]
    
    for ii in range(1, len(weights)):
        flips.append(1.0 - flips[-1])
        
    v1 = x[:,:halfsies]
    v2 = x[:,halfsies:]
    
    assert len(weights) == len(act)
    
    if not(forward):
        acts.reverse()
    
     
    for ii in range(len(weights)): #w, a, flip  in zip(weights, acts, flips):
        
        v1 = v1
         
        v2 = v2 + c * acts[ii](np.matmul(v1, weights[ii]))
        
        v = np.append(v1, v2, axis=1)
        
        if flips[ii]:
            v1 = v[:,:halfsies]
            v2 = v[:,halfsies:]
        else:
            v1 = v[:,halfsies:]
            v2 = v[:,:halfsies]
    
    
    #final layer is not bijective
    #v = acts[-1](np.matmul(v, weights[-1]))
    if not regression:
        v = v * output_mask    
        v = acts[-1](v)
    
    return v


wbnn = []
acts = []

x = np.random.random((1,64))

for layers in range(6):
    wbnn.append(npr.randn(32,32))
    acts.append(np.tanh)
    pred = bnn_pass(x, wbnn, acts, forward=True)
    rpred = bnn_pass(pred, wbnn, acts, forward=False)

   
    
    plt.figure()
    plt.plot(x[0]-rpred[0])
    plt.title("{} layers input - rpred".format(1+layers))
    plt.show()
    
    if(1):
        
        plt.figure()
        plt.plot(x[0]-pred[0])
        plt.title("{} layers input - output".format(1+layers))
        plt.show()




In [None]:
x = np.linspace(-1,1,64).reshape(1,64)
y = sinc(x)

plt.figure()
plt.plot(x[0],y[0])
plt.show()
x.shape

In [None]:
x = np.linspace(-1,1,64).reshape(1,64)
y = sinc(2*x)

wbnn = [npr.randn(32,32)/32**2, npr.randn(32,32)/(32*32)]#, npr.randn(32,32)/(32*32)]
acts = [relu, relu]#, np.tanh]

loss = get_mse_loss(bnn_pass, y, wbnn, acts, y)
print("loss = {:.2f}".format(epoch, loss))

pred = bnn_pass(y, wbnn, acts)
predr = bnn_pass(pred, wbnn, acts, forward=False)
plt.figure()
plt.plot(x[0], y[0], label="target")
plt.plot(x[0], pred[0], label="forward pred.")
plt.plot(x[0], predr[0], label="reverse pred.")
plt.legend()
plt.show()

grad_mse_loss = grad(get_mse_loss, argnum=2)
m, v = None, None

lr = 3e-9
l2_reg = 1e-3
l1_reg = 1e-3

for epoch in range(10):
    if epoch % 1 == 0:
        lr = lr * 0.9
        pred = bnn_pass(y, wbnn, acts)
        loss = mse_loss(y, pred)

        print("step {} loss = {:.2f}".format(epoch, loss))
        
        pred = bnn_pass(x, wbnn, acts)
        loss = get_mse_loss(bnn_pass, y, wbnn, acts, y)
        
        print("step {} validation loss = {:.2f}".format(epoch, loss) )
        print(lr)
    
    grads = grad_mse_loss(bnn_pass, y, wbnn, acts, y)
    
    update, m, v = adam_update(grads, m, v)
    
    if epoch > 500:
    
        l2_reg = 1e-9
        l1_reg = 1e-9
    
    for my_grad, my_params in zip(update, wbnn):

        my_params -= lr * (my_grad + l1_reg * np.abs(my_params) + l2_reg * np.abs(my_params)**2)

pred = bnn_pass(y, wbnn, acts)
predr = bnn_pass(pred, wbnn, acts, forward=False)
plt.figure()
plt.plot(x[0], y[0], label="target")
plt.plot(x[0], pred[0], label="forward pred.")
plt.plot(x[0], predr[0], label="reverse pred.")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.plot(x[0], y[0])
plt.plot(x[0], pred[0])
plt.show()

In [None]:
my_grad = [npr.randn(32,32)] * 3

g, m, v = adam_update(my_grad, m, v)

plt.figure()
plt.subplot(131)
plt.imshow(g[1])
plt.subplot(132)
plt.imshow(m[1])
plt.subplot(133)
plt.imshow(v[1])
plt.show()

In [None]:
x,y = datasets.load_digits(return_X_y=True)

x /= 255.
y = get_one_hot(y)

my_seed = 13
npr.seed(my_seed)
npr.shuffle(x)
my_seed = 13
npr.seed(my_seed)
npr.shuffle(y)

val_x, val_y = x[:128,:], y[:128]

test_x, test_y = x[128:256,:], y[128:256]

train_x, train_y = x[256:,:], y[256:]


In [None]:
#for epoch in range(100):
lr = 3e-2


def relu(x):
    return x * (x > 0.0)

def leaky_relu(x):
    return x * (x > 0.0) + 0.1 * x * (x <= 0.0)

def prelu(x,a=0.01):
    return x * (x > 0.0) + a * x * (x <= 0.0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def swish(x):
    return x * sigmoid(x)

def elu(x, a = 0.1): 
    return x * (x > 0.0) + a * (np.exp(x)-1) * (x <= 0.0)
    
#wbnn = [npr.randn(32,32)/32**2, npr.randn(32,32)/32**2, npr.randn(32,32)/32**2, npr.randn(32,32)/(32*32)]
acts = [relu, elu, swish,  softmax]

gradloss = grad(get_log_loss, argnum=2)
m, v = None, None


l2_reg = 1e-3
l1_reg = 1e-3

for epoch in range(1000):
    
    if epoch % 50 == 0:
        lr = lr * 0.9
        pred = bnn_pass(train_x, wbnn, acts)
        loss = get_log_loss(bnn_pass, train_x, wbnn, acts, train_y)
        accuracy = get_accuracy(train_y, pred[:,:10])

        print("step {} loss = {:.2f} accuracy = {:.2f}".format(epoch, loss, accuracy))
        
        pred = bnn_pass(val_x, wbnn, acts)
        loss = get_log_loss(bnn_pass, val_x, wbnn, acts, val_y)
        accuracy = get_accuracy(val_y, pred[:,:10])

        print("step {} validation loss = {:.2f} accuracy = {:.2f}".format(epoch, loss, accuracy))
        print(lr)
    
    grads = gradloss(bnn_pass, train_x, wbnn, acts, train_y)
    
    update, m, v = adam_update(grads, m, v)
    
    if epoch > 500:
    
        l2_reg = 1e-9
        l1_reg = 1e-9
    
    for my_grad, my_params in zip(update, wbnn):

        my_params -= lr * (my_grad + l1_reg * np.abs(my_params) + l2_reg * np.abs(my_params)**2)


In [None]:
#for epoch in range(100):
lr = 3e-3

wnn = [npr.randn(64,64), npr.randn(64,64), npr.randn(64,64), npr.randn(64,64)]
acts = [swish, swish, swish, softmax]

gradloss = grad(get_log_loss, argnum=2)

l2_reg = 1e-9
l1_reg = 1e-9

for epoch in range(1000):
    
    grads = gradloss(nn_pass, train_x, wnn, acts, train_y)

    if epoch % 100 == 0:
        lr = lr * 0.9
        pred = nn_pass(train_x, wnn, acts)
        loss = get_log_loss(nn_pass, train_x, wnn, acts, train_y)
        accuracy = get_accuracy(train_y, pred[:,:10])

        print("step {} loss = {:.2f} accuracy = {:.2f}".format(epoch, loss, accuracy))
        
        pred = nn_pass(val_x, wnn, acts)
        loss = get_log_loss(nn_pass, val_x, wnn, acts, val_y)
        accuracy = get_accuracy(val_y, pred[:,:10])

        print("step {} validation loss = {:.2f} accuracy = {:.2f}".format(epoch, loss, accuracy))
        print(lr)
        
    if epoch > 200:
    
        l2_reg = 1e-3
        l1_reg = 1e-3
        
    for my_grad, my_params in zip(grads, wnn):

        my_params -= lr * (my_grad + l1_reg * np.abs(my_params) + l2_reg * np.abs(my_params)**2)


In [None]:

wbnn = [npr.randn(32,32), npr.randn(32,32), npr.randn(32,32)]
acts = [np.tanh, np.tanh, softmax]

pred = bnn_pass(train_x, wbnn, acts)
loss = get_log_loss(bnn_pass, train_x, wbnn, acts, train_y)
accuracy = get_accuracy(train_y, pred)
print(loss, accuracy)

In [None]:
print(pred[0:1, 0:10])

log_loss(train_y, pred)

In [None]:
print(np.argmax(pred[0:10,0:10], axis=1))
print(np.argmax(train_y[0:10], axis=1))

In [None]:
plt.figure()
plt.imshow(train_x[0].reshape(8,8))
plt.title("{}".format(np.argmax(train_y[0])))
plt.show()

In [None]:
np.sum(softmax(pred[0:12,:]),axis=1)

In [None]:
x = npr.randn(1,4)
#x[:,:2] *= 0

weights = [npr.randn(2,2)] * 4
acts = [np.tanh] * 3 + [softmax]
flips = [1,0,1,0]

temp = bnn_pass(x, weights, acts, flips)

temp2 = bnn_pass(temp, weights, acts, flips, forward=False)
print(x)
print(temp)
print(temp2)

In [None]:
x = npr.randn(1,4)

weights = [npr.randn(4,4)] * 4
acts = [np.tanh] * 3 + [softmax]
flips = [1,0,1,0]

temp = nn_pass(x, weights, acts)

temp2 = nn_pass(temp, weights, acts, forward=False)

print(x)
print(temp)
print(temp2)

In [None]:
temp = (["cat"] *3 + ["dog"])
temp.reverse()
print(temp)