In [1]:
import pickle as pkl
import numpy as np
import tensorflow as tf

from pandas import DataFrame, get_dummies

class Dataset(object):
    def __init__(self, features, labels):
        assert features.shape[0] == labels.shape[0], (
        'features.shape: %s labels.shape: %s' % (features.shape,labels.shape))
        self._num_examples = features.shape[0]

        features = features.astype(np.float32)
        self._features = features
        self._labels = labels
        self._epochs_completed = 0
        self._index_in_epoch = 0
    @property
    def features(self):
        return self._features
    @property
    def labels(self):
        return self._labels
    @property
    def num_examples(self):
        return self._num_examples
    @property
    def epochs_completed(self):
        return self._epochs_completed
    def next_batch(self, batch_size):
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
          # Finished epoch
          self._epochs_completed += 1
          # Shuffle the data
          perm = np.arange(self._num_examples)
          np.random.shuffle(perm)
          self._features = self._features[perm]
          self._labels = self._labels[perm]
          # Start next epoch
          start = 0
          self._index_in_epoch = batch_size
          assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._features[start:end], self._labels[start:end]

In [2]:
FC_dim = 256
PKL_DIR = './'

epoch = 15

def load():
    with open(PKL_DIR + 'train.pkl', 'rb') as input:
        train = pkl.load(input)
        train_x = train['x'].astype('float32')
        train_y = train['y']

    with open(PKL_DIR + 'valid.pkl', 'rb') as input:
        valid = pkl.load(input)
        valid_x = valid['x'].astype('float32')
        valid_y = valid['y']

    with open(PKL_DIR + 'test.pkl', 'rb') as input:
        test_x = pkl.load(input).astype('float32')

    return train_x, train_y, valid_x, valid_y, test_x

In [32]:
def create_weight_variables(shape, names):
    w = tf.Variable(tf.truncated_normal(shape=shape), name=names[0])
    b = tf.Variable(tf.truncated_normal(shape=[shape[1]]), name=names[0])
    
    return w, b
    
def activate(input_layer, weight, bias, activation=None):
    linear_combination = tf.matmul(input_layer, weight) + bias
    
    if activation == None:
        return linear_combination
    elif activation == 'relu':
        return tf.nn.relu(linear_combination)
    elif activation == 'tanh':
        return tf.nn.tanh(linear_combination)
    elif activation == 'sigmoid':
        return tf.nn.sigmoid(linear_combination)
    elif activation == 'softmax':
        return tf.nn.softmax(linear_combination)
    
def FullConnected_layer(input_layer, to_shape, names, activation=None):
    origin_shape = input_layer.get_shape().as_list()[1]
    W, b = create_weight_variables([origin_shape, to_shape], names)
    output_layer = activate(input_layer, W, b, activation)
    return output_layer

In [82]:
def create_highway_weights(shape, name):
    h_W, h_b = create_weight_variables([shape, shape], ['h_W_'+name, 'h_b_'+name])
    t_W, t_b = create_weight_variables([shape, shape], ['t_W_'+name, 't_b_'+name])
    c_W, c_b = create_weight_variables([shape, shape], ['c_W_'+name, 'c_b_'+name])
    
    return [h_W, h_b, t_W, t_b, c_W, c_b]

def highway_activate(input_layer, weights, activations=['sigmoid', 'sigmoid', 'sigmoid']):
    H = activate(input_layer, weights[0], weights[1], activations[0])
    T = activate(input_layer, weights[2], weights[3], activations[1])
    #C = activate(input_layer, weights[4], weights[5], activations[2])
    C = 1 - T
    
    output_layer = tf.multiply(H, T) + tf.multiply(input_layer, C) 
    return output_layer

def Highway_layer(input_layer, name, user_activations=['sigmoid', 'sigmoid', 'sigmoid']):
    shape = input_layer.get_shape().as_list()[1]
    output_layer = highway_activate(input_layer, create_highway_weights(shape, name), activations=user_activations)
    return output_layer

In [42]:
train_x, train_y, valid_x, valid_y, test_x = load()
train_x = train_x.reshape(-1, 28*28)
valid_x = valid_x.reshape(-1, 28*28)
test_x = test_x.reshape(-1, 28*28)

In [85]:
x = tf.placeholder(tf.float32, shape=(None, 28*28))
y = tf.placeholder(tf.float32, shape=(None, 10))

f = FullConnected_layer(x, 50, ['W_0', 'b_0'])

# 1st layer
highway_1    = Highway_layer(f,         'layer1')
highway_2    = Highway_layer(highway_1, 'layer2')

output_layer = FullConnected_layer(highway_2, 10, ['W_2', 'b_2'], activation='softmax')
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output_layer, y))
#loss = tf.reduce_sum(output_layer)
optimizer = tf.train.RMSPropOptimizer(0.01).minimize(loss)

In [86]:
dataset = Dataset(train_x, get_dummies(train_y).values)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for _ in xrange(30000):
        #r = np.random.randint(train_x.shape[0])
        #fd = {x: [train_x[r]], y: [get_dummies(train_y).values[r]]}
        x_batch, y_batch = dataset.next_batch(4096)
        fd = {x: x_batch, y: y_batch}
        sess.run(optimizer, feed_dict=fd)
        if (_+1)%1000 == 0:
            print("step: {} loss: {}".format(_+1, sess.run(loss, feed_dict=fd)))

step: 1000 loss: 1.93161690235
step: 2000 loss: 1.79904603958
step: 3000 loss: 1.73361611366
step: 4000 loss: 1.64230585098
step: 5000 loss: 1.63058710098
step: 6000 loss: 1.64889788628
step: 7000 loss: 1.62399530411
step: 8000 loss: 1.62130522728
step: 9000 loss: 1.59957718849
step: 10000 loss: 1.58775126934
step: 11000 loss: 1.58957076073
step: 12000 loss: 1.59713923931
step: 13000 loss: 1.5932328701
step: 14000 loss: 1.58517599106
step: 15000 loss: 1.59274458885
step: 16000 loss: 1.58883833885
step: 17000 loss: 1.59347701073
step: 18000 loss: 1.58712947369
step: 19000 loss: 1.59396529198
step: 20000 loss: 1.59030342102
step: 21000 loss: 1.58493208885
step: 22000 loss: 1.58883833885
step: 23000 loss: 1.58664119244
step: 24000 loss: 1.58468794823
step: 25000 loss: 1.59054720402
step: 26000 loss: 1.59079134464
step: 27000 loss: 1.58688521385
step: 28000 loss: 1.60397541523
step: 29000 loss: 1.59689509869
step: 30000 loss: 1.59347712994


In [65]:
train_x.shape

(33600, 784)