In [1]:
import numpy as np
import tensorflow as tf
import datetime
import matplotlib.pyplot as plt
import pickle

In [2]:
# load the data
data_file = open('images_zero_one.txt', 'rb')
train_data, train_labels, test_data, test_labels = pickle.load(data_file, encoding='latin1')
data_file.close()

train_data = np.reshape(train_data, [-1, 28, 28])
test_data = np.reshape(test_data, [-1, 28, 28])
train_labels = train_labels.astype(np.float32)
test_labels = test_labels.astype(np.float32)

train_data = (train_data - np.mean(train_data)) / np.std(train_data)
test_data = (test_data - np.mean(test_data)) / np.std(test_data)

In [17]:
hm_train, dim1, dim2 = train_data.shape
hm_test = len(test_labels)
hm_classes = 2
batch_size = 2
max_epoch = 1
eval_interval = 10
learning_rate = 0.005
test_acc = np.zeros((0, 1))
train_acc = np.zeros((0, 1))
train_loss = np.zeros((0, 1))

In [4]:
def weight_init(shape):
    initial = tf.truncated_normal(shape, mean=0.1, stddev=0.01) / 10
    return tf.Variable(initial)

def bias_init(shape):
    initial = tf.constant(0.001, shape=shape)
    return tf.Variable(initial)

def conv2d(x, w):
    hm_data, dim1, dim2 = x.get_shape().as_list()
    #print(hm_data, dim1, dim2)
    stride = 1
    filter_size1, filter_size2 = w.get_shape().as_list()
    #print(filter_size1, filter_size2)
    #convoled_im = tf.Variable(tf.ones([int(hm_data), int((dim1 - filter_size1) / stride + 1), int((dim2 - filter_size2) / stride + 1)]))
    convoled_im = tf.zeros([0, 24, 24])
    for im in np.arange(int(hm_data)): 
        slices = tf.zeros([0, 24])
        for i in np.arange(int((dim1 - filter_size1) / stride + 1)):
            row = tf.zeros([1, 0])
            for j in np.arange(int((dim2 - filter_size2) / stride + 1)):
                reception_area = x[im, i:i+filter_size1, j:j+filter_size2]
                #print(reception_area.get_shape().as_list())
                temp = tf.reshape(tf.reduce_sum(tf.reshape(reception_area, [filter_size1, filter_size2]) * w), [1, 1])
                row = tf.concat([row, temp], 1)
                #print(temp.get_shape().as_list())
                #convoled_im[im, i, j].assign(temp)
            slices = tf.concat([slices, row], 0)
        convoled_im = tf.concat([convoled_im, slices[None, :, :]], 0)
    return tf.reshape(convoled_im, [-1, 24, 24])

def avg_pool(x):
    hm_data, dim1, dim2 = x.get_shape().as_list()
    print(hm_data, dim1, dim2)
    stride = 2
    kernel_size = 2
    #pooled_im = tf.Variable(tf.zeros([int(hm_data), (dim1 - kernel_size) / stride + 1, (dim2 - kernel_size) / stride + 1]), trainable=False)
    pooled_im = tf.zeros([0, 12, 12])
    for im in np.arange(hm_data):
        slices = tf.zeros([0, 12])
        for i in np.arange(int((dim1 - kernel_size) / stride + 1)):
            row = tf.zeros([1, 0])
            for j in np.arange(int((dim2 - kernel_size) / stride + 1)):
                reception_area = x[im, i * stride:i * stride + kernel_size, j * stride:j * stride+kernel_size]
                temp = tf.reshape(tf.reduce_mean(reception_area), [1, 1])
                row = tf.concat([row, temp], 1)
                
            slices = tf.concat([slices, row], 0)
        pooled_im = tf.concat([pooled_im, slices[None, :, :]], 0)
            
    return pooled_im

def sigmoid_cross_entropy(y_pred, y):
    return tf.matmul(-tf.transpose(y), tf.log(tf.sigmoid(y_pred))) - tf.matmul(tf.transpose(1-y), tf.log(1 - tf.sigmoid(y_pred)))

def compute_gradient(fc, y_out, w_fc, b_fc, x, y, h_conv1, h_conv2):
    grad_w_fc = tf.matmul(tf.transpose(fc), (1 - tf.sigmoid(y_out)) * -y) - tf.matmul(tf.transpose(fc), (1-y) * tf.sigmoid(y_out))
    grad_b_fc = tf.reshape(tf.reduce_sum(1 - tf.sigmoid(y_out) * -y - (1-y) * tf.sigmoid(y_out)), [1, 1])
    grad_fc = tf.matmul(-y * (1 - tf.sigmoid(y_out)), tf.transpose(w_fc)) - tf.matmul((1-y) * tf.sigmoid(y_out), tf.transpose(w_fc))
    #print(grad_fc.get_shape().as_list())
    #activation1 = tf.zeros([0, 24, 24])
    #activation2 = tf.zeros([0, 24, 24])
    grad_w1 = tf.zeros([5, 5])
    grad_w2 = tf.zeros([5, 5])
    grad_b1 = tf.zeros([24, 24])
    grad_b2 = tf.zeros([24, 24])
    for im in np.arange(batch_size):
        one_im = tf.reshape(grad_fc[im, :], [288, 1])
        # print(one_im.get_shape().as_list())
        pool1 = tf.reshape(one_im[0:144, 0], [12, 12])
        pool2 = tf.reshape(one_im[144:, 0], [12, 12])
        act1 = tf.zeros([0, 24])
        act2 = tf.zeros([0, 24])
        for i in np.arange(12):
            act_r1 = tf.zeros([2, 0])
            act_r2 = tf.zeros([2, 0])
            for j in np.arange(12):
                temp1 = tf.ones([2, 2]) * pool1[i, j] / 4
                temp2 = tf.ones([2, 2]) * pool2[i, j] / 4
                act_r1 = tf.concat([act_r1, temp1], 1)
                act_r2 = tf.concat([act_r2, temp2], 1)
                
            act1 = tf.concat([act1, act_r1], 0)
            act2 = tf.concat([act2, act_r2], 0)
            
        #activation1 = tf.concat([activation1, act1[None, :, :]], 0)
        #activation2 = tf.concat([activation2, act2[None, :, :]], 0)
        print(grad_b1.get_shape().as_list())
        grad_C1 = tf.cast(h_conv1[im, :, :] > 0, tf.float32) * act1[None, :, :]
        grad_C2 = tf.cast(h_conv2[im, :, :] > 0, tf.float32) * act2[None, :, :]
        grad_b1 += tf.reshape(grad_C1, [24, 24])
        grad_b2 += tf.reshape(grad_C2, [24, 24])
        print(grad_C1.get_shape().as_list())
        print(grad_b1.get_shape().as_list())
        f1 = tf.zeros([0, 5])
        f2 = tf.zeros([0, 5])
        for r in np.arange(5):
            t1 = tf.zeros([1, 0])
            t2 = tf.zeros([1, 0])
            for c in np.arange(5):
                t1 = tf.concat([t1, tf.reshape(tf.reduce_sum(x[im, r:r+24, c:c+24] * grad_C1), [1, 1])], 1)
                t2 = tf.concat([t2, tf.reshape(tf.reduce_sum(x[im, r:r+24, c:c+24] * grad_C2), [1, 1])], 1)
            
            f1 = tf.concat([f1, t1], 0)
            f2 = tf.concat([f2, t2], 0)
            
        grad_w1 += f1
        grad_w2 += f2
        
    return grad_w_fc, grad_b_fc, grad_w1, grad_b1, grad_w2, grad_b2

def train_opt(grad_w_fc, grad_b_fc, grad_w1, grad_b1, grad_w2, grad_b2, w_fc, b_fc, w1, b1, w2, b2, rate):
    tf.assign_add(w_fc, -rate * grad_w_fc)
    print(b1.get_shape().as_list())
    print(grad_b1.get_shape().as_list())
    
    tf.assign_add(b_fc, -rate * grad_b_fc[:, 0])
    tf.assign_add(w1, -rate * grad_w1)
    tf.assign_add(b1, -rate * grad_b1)
    tf.assign_add(w2, -rate * grad_w2)
    tf.assign_add(b2, -rate * grad_b2)
    
    return w_fc, b_fc, w1, b1, w2, b2

In [5]:
# define the flow
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, 28, 28])
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, 1])

w1 = weight_init([5, 5])
b1 = bias_init([24, 24])

w2 = weight_init([5, 5])
b2 = bias_init([24, 24])

In [6]:
# Convolutional layer, and pooling with two filters
h_conv1 = tf.nn.relu(conv2d(x, w1) + b1)
h_pool1 = avg_pool(h_conv1)

h_conv2 = tf.nn.relu(conv2d(x, w2) + b2)
h_pool2 = avg_pool(h_conv2)

# fully connected layer
fc = tf.concat([tf.reshape(h_pool1, [-1, 144]), tf.reshape(h_pool2, [-1, 144])], 1)
w_fc = weight_init([144 * 2, 1])
b_fc = bias_init([1])

# output layer
y_out = tf.matmul(fc, w_fc) + b_fc
y_pred = tf.sigmoid(y_out)

2 24 24
2 24 24


In [7]:
# The backward propagation
# define the loss function
# cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_pred, labels=y))
cross_entropy = sigmoid_cross_entropy(y_out, y)
G_w_fc, G_b_fc, G_w1, G_b1, G_w2, G_b2 = compute_gradient(fc, y_out, w_fc, b_fc, x, y, h_conv1, h_conv2)

[24, 24]
[1, 24, 24]
[24, 24]
[24, 24]
[1, 24, 24]
[24, 24]


In [8]:
a = train_opt(G_w_fc, G_b_fc, G_w1, G_b1, G_w2, G_b2, w_fc, b_fc, w1, b1, w2, b2, learning_rate)

correct_prediction = tf.equal(tf.to_float(y_pred >= 0.5), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

[24, 24]
[24, 24]


In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for epoch in np.arange(max_epoch):
    re_order = np.random.permutation(hm_train)
    
        
    for i in np.arange(int(hm_train / batch_size)):
        batch_x = train_data[re_order[i * batch_size:(i + 1) * batch_size], :, :]
        batch_y = train_labels[re_order[i * batch_size:(i + 1) * batch_size]][:, None]
            
        ce = sess.run(cross_entropy, feed_dict={x: batch_x, y: batch_y})
        train_accuracy = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
        #test_accuracy = sess.run(accuracy, feed_dict={x: test_data, y: test_labels[:, None]})
        train_acc = np.vstack((train_acc, train_accuracy))
        train_loss = np.vstack((train_loss, ce))
        #test_acc = np.vstack((test_acc, test_accuracy))
            
        if i % eval_interval == 0:
            learning_rate *= 0.9
            print("epoch: %d step: %d, training accuracy: %g, loss: %g" %
                            (epoch, i, np.mean(train_acc), np.mean(train_loss)))
                
        
                
        sess.run(a, feed_dict={x: batch_x, y:batch_y})

In [None]:
for i in np.arange(int(1100 / batch_size)):
    batch_x = test_data[i * batch_size:(i + 1) * batch_size, :, :]
    batch_y = test_labels[i * batch_size:(i + 1) * batch_size][:, None]
    test_accuracy = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
    test_acc = np.vstack((train_acc, train_accuracy))
            
print("testing accuracy: %g " % np.mean(test_acc))

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for epoch in np.arange(1):
    
    batch_x = train_data[0:batch_size, :, :]
    batch_y = train_labels[0:batch_size][:, None]
            
    ce = sess.run(cross_entropy, feed_dict={x: batch_x, y: batch_y})
    Grad_w_fc, _, Grad_w1, _, _, _ = sess.run(compute_gradient(fc, y_out, w_fc, b_fc, x, y, h_conv1, h_conv2), feed_dict={x: batch_x, y: batch_y})
    compare_wfc = sess.run(tf.gradients(cross_entropy, w_fc), feed_dict={x: batch_x, y: batch_y})
    compare_w1 = sess.run(tf.gradients(cross_entropy, w1), feed_dict={x: batch_x, y: batch_y})
    

In [13]:
print(Grad_w_fc)

[[ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.0004151 ]
 [-0.00891668]
 [-0.00930318]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.02336233]
 [-0.13094537]
 [-0.11931626]
 [-0.00896868]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.07830977]
 [-0.2757951 ]
 [-0.23357067]
 [-0.02556819]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.14110583]
 [-0.32920167]
 [-0.2387878 ]
 [-0.02291412]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.00116262]
 [-0.21253577]
 [-0.35589141]
 [-0.20068158]
 [-0.00885565]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [-0.0191385 ]
 [-0.26550195]
 [-0.35935

In [11]:
print(compare_wfc)

[array([[ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.0004151 ],
       [-0.00891668],
       [-0.00930318],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.02336233],
       [-0.13094537],
       [-0.11931626],
       [-0.00896868],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.07830977],
       [-0.2757951 ],
       [-0.23357067],
       [-0.02556819],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [-0.14110583],
       [-0.32920167],
       [-0.2387878 ],
       [-0.02291412],
       [ 

In [14]:
print(Grad_w1)

[[-0.19678211 -0.26824868 -0.28050941 -0.27819446 -0.25289649]
 [-0.23463902 -0.29250535 -0.29847866 -0.29639342 -0.26399124]
 [-0.25376344 -0.29285061 -0.29843727 -0.29766166 -0.25829631]
 [-0.2593267  -0.29288825 -0.29801801 -0.29560554 -0.23564357]
 [-0.25277612 -0.28130531 -0.28598326 -0.2778092  -0.19716889]]


In [15]:
print(compare_w1)

[array([[-0.19678208, -0.26824859, -0.28050935, -0.27819449, -0.25289646],
       [-0.23463903, -0.29250523, -0.29847866, -0.29639348, -0.26399121],
       [-0.2537635 , -0.29285058, -0.29843724, -0.2976616 , -0.25829634],
       [-0.25932673, -0.29288822, -0.29801801, -0.29560551, -0.23564357],
       [-0.25277615, -0.28130528, -0.28598326, -0.2778092 , -0.19716889]], dtype=float32)]
