In [1]:
%reset -f
%matplotlib inline

In [2]:
import os
import sys
sys.path.insert(0, '/Users/jeff/Documents/Python/_projects/tdadl/')

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from toy_data import *

In [3]:
# seaborn setup
cmap = sns.color_palette('Set1')
sns.set_palette(cmap)

data = mnist_data()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
## Model parameters
m_dim = data.inputs.shape[1]
p_dim = data.outputs.shape[1]

#for...
#  layer 0 input vector space
#  layer 1:7 intermediate
#  layer 8 final layer
#convention: "layers = 8"

layers = 8
l_dim = [m_dim] + (layers-1)*[240] + [p_dim]
stddev = 0.01
b_init = 0.0
alpha = 1

batch_size = 100

In [5]:
## Model
tf.reset_default_graph()

# placeholders
x_in = tf.placeholder(tf.float32, shape=[batch_size, m_dim]) # Input
y = tf.placeholder(tf.float32, shape=[batch_size, p_dim]) # Output
epoch = tf.placeholder(tf.float32, shape=None) # training iteration

noise_inj = .1/(1.+epoch/200.) # stddev

# Initialize lists.
b = (layers+1)*[None] 
W = (layers+1)*[None]
x = (layers+1)*[None]

L = (layers+1)*[None]
L_inv = (layers+1)*[None]

x_ = (layers+1)*[None]
V = (layers+1)*[None]
c = (layers+1)*[None]

x_c = (layers+1)*[None]
fx_c = (layers+1)*[None]

train_op_inv = (layers+1)*[None]
train_op = (layers+1)*[None]

def f(ll, zz):
    """map from layer ll-1 to ll"""
    return tf.nn.sigmoid(tf.matmul(zz, W[ll]) + b[ll], name='f')

def g(ll, zz):
    """map from layer ll to ll-1"""
    return tf.nn.sigmoid(tf.matmul(zz, V[ll]) + c[ll], name='g')

In [6]:
# Forward graph
x[0] = x_in
for l in range(1, layers+1):
    with tf.name_scope('Layer_Forward'+str(l)):
        b[l] = tf.Variable(tf.constant(b_init, shape=[1, l_dim[l]]), name='b')
        W[l] = tf.Variable(tf.truncated_normal([l_dim[l-1], l_dim[l]], stddev=np.sqrt(6./(l_dim[l-1]+l_dim[l]))), name='W')
        x[l] = f(l, x[l-1])

In [7]:
# Top layer loss / top layer target
L[-1] = tf.reduce_mean(-tf.reduce_sum(y*tf.log(tf.nn.softmax(x[-1])), reduction_indices=[1]))
x_[-1] = x[-1] - alpha*tf.gradients(L[-1], [x[-1]])[0]

In [8]:
# Feedback graph
for l in range(layers, 1, -1):
    with tf.name_scope('Layer_Feedback'+str(l)):
        c[l] = tf.Variable(tf.constant(b_init, shape=[1, l_dim[l-1]]), name='c')
        V[l] = tf.Variable(tf.truncated_normal([l_dim[l], l_dim[l-1]], stddev=np.sqrt(6./(l_dim[l-1]+l_dim[l]))), name='V')
        x_[l-1] = x[l-1] - g(l, x[l]) + g(l, x_[l])

In [9]:
# Corrupted targets
for l in range(1, layers):
    x_c[l] = tf.stop_gradient(tf.random_normal([1, l_dim[l]], mean=x[l], stddev=noise_inj), name='x_c')
    fx_c[l+1] = tf.stop_gradient(f(l+1, x_c[l]), name='fx_c')

In [10]:
# Loss functions
for l in range(1, layers):
    L[l] = tf.reduce_mean(0.5*(f(l, tf.stop_gradient(x[l-1])) - tf.stop_gradient(x_[l]))**2, name='L')
for i in range(2, layers+1):
    L_inv[i] = tf.reduce_mean(0.5*(g(i, fx_c[i]) - x_c[i-1])**2, name='L_inv')

In [11]:
# Optimizers
opt = tf.train.AdamOptimizer(0.001)
for l in range(1, layers+1):
    train_op[l] = opt.minimize(L[l], var_list=[W[l], b[l]])
for l in range(2, layers+1):
    train_op_inv[l] = opt.minimize(L_inv[l], var_list=[V[l], c[l]])

# Backprop. for reference
train_bp = opt.minimize(L[-1], var_list=[i for i in W+b if i is not None])

In [12]:
correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(x[-1]), 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [13]:
# clean up
train_op = [i for i in train_op if i is not None]
train_op_inv = [i for i in train_op_inv if i is not None]

In [14]:
# Tensorboard
for l in range(layers+1):
    if L[l] is not None:
        tf.scalar_summary('L'+str(l), L[l])
    if L_inv[l] is not None:
        tf.scalar_summary('L_inv'+str(l), L_inv[l])
tf.scalar_summary('accuracy', accuracy)

for var in tf.all_variables():
    tf.histogram_summary(var.name, var)
merged_summary_op = tf.merge_all_summaries()

In [15]:
run = 2

In [16]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())

run+=1

summary_writer = tf.train.SummaryWriter('/tmp/targ-prop/'+str(run), sess.graph)

for i in range(100000):
    x_batch, y_batch = data.rand_batch(batch_size)
    feed_dict={x_in: x_batch, y: y_batch, epoch: i}
    sess.run(train_op_inv, feed_dict=feed_dict)
    sess.run(train_op, feed_dict=feed_dict)
    
    if i % 10 == 0:
        loss_val, summary_str, acc_val = sess.run([L[-1], merged_summary_op, accuracy], feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, i)
    
    if i % 100 == 0:
        print "iter:", "%04d" % (i), \
              "loss:", "{:.4f}".format(loss_val), \
              "accuracy:", "{:.4f}".format(acc_val)

print "finished"

iter: 0000 loss: 2.3148 accuracy: 0.1700
iter: 0100 loss: 2.3106 accuracy: 0.0900
iter: 0200 loss: 2.3006 accuracy: 0.1100
iter: 0300 loss: 2.3022 accuracy: 0.0800
iter: 0400 loss: 2.2959 accuracy: 0.1600
iter: 0500 loss: 2.3042 accuracy: 0.1000
iter: 0600 loss: 2.3114 accuracy: 0.0500
iter: 0700 loss: 2.3024 accuracy: 0.1300
iter: 0800 loss: 2.3047 accuracy: 0.0900
iter: 0900 loss: 2.2974 accuracy: 0.1600
iter: 1000 loss: 2.3017 accuracy: 0.1100
iter: 1100 loss: 2.3032 accuracy: 0.1200
iter: 1200 loss: 2.3005 accuracy: 0.1300
iter: 1300 loss: 2.3100 accuracy: 0.0600
iter: 1400 loss: 2.3024 accuracy: 0.1100
iter: 1500 loss: 2.2994 accuracy: 0.1100
iter: 1600 loss: 2.3025 accuracy: 0.1000
iter: 1700 loss: 2.2984 accuracy: 0.1400
iter: 1800 loss: 2.3040 accuracy: 0.0900
iter: 1900 loss: 2.2984 accuracy: 0.1300
iter: 2000 loss: 2.2989 accuracy: 0.1300
iter: 2100 loss: 2.2975 accuracy: 0.1300
iter: 2200 loss: 2.3030 accuracy: 0.1200
iter: 2300 loss: 2.2994 accuracy: 0.1700
iter: 2400 loss:

In [17]:
   
# # Global loss
# #L_g = tf.reduce_mean(0.5*(x[-1] - y)**2, name="global_loss")
# with tf.variable_scope('global_loss'):
#     L_g = tf.reduce_mean(-tf.reduce_sum(y*tf.log(tf.nn.softmax(x[-1])), reduction_indices=[1]))

# # Top-layer targets
# L_grads = tf.gradients(L_g, [x[-1]])

# x_tar[-1] = x[-1] - alpha*L_grads[0]
# #x_tar[-2] = x[-2] - alpha*L_grads[1]

# # Target graph
# for l in range(layers,0,-1): # from M to 2
#     with tf.name_scope('layer_target'+str(l)) as scope:
#         if x_tar[l] is None:
#             x_tar[l] = x[l] + gx_tar[l+1] - gx[l+1] # gx[l+1] must be defined of course...
#         if l > 1:
#             c[l] = tf.Variable(tf.constant(b_init, shape=[1, l_dim[l-1]]), name='c')
#             V[l] = tf.Variable(tf.truncated_normal([l_dim[l], l_dim[l-1]], stddev=np.sqrt(6./(l_dim[l-1]+l_dim[l]))), name='V')
#             gx[l] = tf.nn.sigmoid(tf.add(tf.matmul(x[l], V[l]), c[l]), name='g_x')

#             gx_tar[l] = tf.nn.sigmoid(tf.add(tf.matmul(x_tar[l], V[l]), c[l]), name='g_x_tar')










# # Optimizers
# opt = tf.train.AdamOptimizer(0.001)

# train_op_inv = (layers+1)*[None]
# train_op = (layers+1)*[None]




# fx_ = (layers+1)*[None] # f(x + eps)
# gx_ = (layers+1)*[None] # g(f(x + eps))

# for l in range(2, layers+1):
#     with tf.name_scope('L_inv_layer'+str(l)) as scope:
#         fx_[l] = tf.nn.sigmoid(tf.matmul(x[l-1], W[l]) + b[l] + tf.random_normal(b[l].get_shape(), stddev=noise_inj))
#         gx_[l] = tf.nn.sigmoid(tf.matmul(fx_[l], V[l]) + c[l])
#         with tf.name_scope('L_inv'):
#             L_inv[l] = tf.reduce_mean(0.5*(gx_[l] - (x[l-1] + tf.random_normal(c[l].get_shape(), stddev=noise_inj)))**2,
#                                       name='L_inv')
#         train_op_inv[l] = opt.minimize(L_inv[l], var_list=[V[l], c[l]])

# for l in range(1, layers+1):
#     with tf.name_scope('L_layer'+str(l)) as scope:
#         with tf.name_scope('L'):
#             L[l] = tf.reduce_mean(0.5*(x[l] - x_tar[l])**2, name="L")
#         train_op[l] = opt.minimize(L[l], var_list=[W[l], b[l]])