### Multi Layer Perceptron

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline  
%config InlineBackend.figure_format='retina'
print ("TF version:[%s]."%(tf.__version__))

TF version:[1.12.0].


### Helper functions

In [2]:
def mlp(x,h_dims=[256,256],actv=tf.nn.relu,out_actv=tf.nn.relu):
    """
    Multi-layer perceptron 
    """
    ki = tf.truncated_normal_initializer(stddev=0.1)
    for h_dim in h_dims[:-1]:
        x = tf.layers.dense(x,units=h_dim,activation=actv,kernel_initializer=ki)
    return tf.layers.dense(x,units=h_dims[-1],activation=out_actv,kernel_initializer=ki)

def placeholder(dim=None):
    """
    Placeholder
    """
    return tf.placeholder(dtype=tf.float32,shape=(None,dim) if dim else (None,))

def placeholders(*args):
    """
    Usage: a_ph,b_ph,c_ph = placeholders(adim,bdim,None)
    """
    return [placeholder(dim) for dim in args]

def get_vars(scope):
    """
    Get TF variables within scope
    """
    if tf.__version__ == '1.12.0':
        tf_vars = [x for x in tf.global_variables() if scope in x.name]
    else:
        tf_vars = [x for x in tf.compat.v1.global_variables() if scope in x.name]
    return tf_vars
print ("Ready.")

Ready.


### Dataset

In [3]:
mnist = tf.keras.datasets.mnist 
(x_train,y_train),(x_test,y_test) = mnist.load_data() # 0~255
y_train,y_test = np.eye(10)[y_train],np.eye(10)[y_test]
x_train,x_test = x_train.reshape((-1,784)),x_test.reshape((-1,784)) # reshape [N x 784]
x_train,x_test = x_train/255.0,x_test/255.0 # pixel values between 0 and 1
n_train,n_test,x_dim = x_train.shape[0],x_test.shape[0],x_train.shape[1]
print ("n_train:[%d], n_test:[%d], x_dim:[%d]"%
       (n_train,n_test,x_dim))

n_train:[60000], n_test:[10000], x_dim:[784]


### Model

In [11]:
class MultiLayerPerceptronClsClass(object):
    """
    MLP for classification
    """
    def __init__(self,name='mlp',x_dim=784,y_dim=10,h_dims=[128,128],actv=tf.nn.relu):
        """
        Initialize
        """
        self.name = name
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.h_dims = h_dims
        self.actv = actv
        self.build_model()
        self.build_graph()
        print("[%s] instantiated."%(self.name))
        
    def build_model(self):
        """
        Build model
        """
        self.ph_x,self.ph_y = placeholders(self.x_dim,self.y_dim)
        with tf.variable_scope('main'):
            self.y_hat = mlp(self.ph_x,h_dims=self.h_dims+[self.y_dim],
                             actv=self.actv,out_actv=None)
        self.main_vars = get_vars('main')
        
    def build_graph(self):
        """
        Build graph
        """
        self.costs = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.ph_y,logits=self.y_hat)
        self.cost = tf.reduce_mean(self.costs) 
        self.optm = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.cost)
        self.corr = tf.equal(tf.argmax(self.y_hat,1),tf.argmax(self.ph_y,1)) # [N]
        self.accr = tf.reduce_mean(tf.cast(self.corr, "float")) # [1]
        
    def update(self,sess,x_batch,y_batch):
        """
        Update model 
        """
        feeds = {self.ph_x:x_batch,self.ph_y:y_batch}
        cost_val,_ = sess.run([self.cost,self.optm],feed_dict=feeds)
        return cost_val
    
    def test(self,sess,x_test,y_test):
        """
        Test the model
        """
        feeds = {self.ph_x:x_test,self.ph_y:y_test}
        accr_val = sess.run(self.accr,feed_dict=feeds)
        return accr_val
    
print ("Ready.")

Ready.


### Instantiate Model

In [12]:
tf.reset_default_graph()
sess = tf.Session() 
M = MultiLayerPerceptronClsClass()

[mlp] instantiated.


In [13]:
for v_idx,var in enumerate(M.main_vars):
    print (v_idx,var)

0 <tf.Variable 'main/dense/kernel:0' shape=(784, 128) dtype=float32_ref>
1 <tf.Variable 'main/dense/bias:0' shape=(128,) dtype=float32_ref>
2 <tf.Variable 'main/dense_1/kernel:0' shape=(128, 128) dtype=float32_ref>
3 <tf.Variable 'main/dense_1/bias:0' shape=(128,) dtype=float32_ref>
4 <tf.Variable 'main/dense_2/kernel:0' shape=(128, 10) dtype=float32_ref>
5 <tf.Variable 'main/dense_2/bias:0' shape=(10,) dtype=float32_ref>


### Loop

In [20]:
sess.run(tf.global_variables_initializer()) # Initialize variables
max_epoch,batch_size,print_every = 10,128,1
max_iter = np.ceil(n_train/batch_size).astype(np.int) # number of iterations
for epoch in range(max_epoch):
    p_idx = np.random.permutation(n_train)
    cost_val_sum,cnt = 0,0
    for it in range(max_iter):
        b_idx = p_idx[batch_size*(it):batch_size*(it+1)]
        x_batch,y_batch = x_train[b_idx,:],y_train[b_idx,:]
        cost_val = M.update(sess=sess,x_batch=x_batch,y_batch=y_batch)
        cost_val_sum += cost_val*len(b_idx)
        cnt += len(b_idx)
    cost_val_avg = cost_val_sum / cnt
    if ((epoch%print_every)==0) or (epoch==(max_epoch-1)):
        accr_val = M.test(sess,x_test,y_test)
        print ("epoch:[%d/%d] cost:[%.3f] test_accuracy:[%.3f]"%
               (epoch,max_epoch,cost_val_avg,accr_val))
print ("Done.")

epoch:[0/10] cost:[0.342] test_accuracy:[0.954]
epoch:[1/10] cost:[0.125] test_accuracy:[0.967]
epoch:[2/10] cost:[0.085] test_accuracy:[0.973]
epoch:[3/10] cost:[0.064] test_accuracy:[0.973]
epoch:[4/10] cost:[0.051] test_accuracy:[0.972]
epoch:[5/10] cost:[0.040] test_accuracy:[0.975]
epoch:[6/10] cost:[0.032] test_accuracy:[0.976]
epoch:[7/10] cost:[0.027] test_accuracy:[0.975]
epoch:[8/10] cost:[0.021] test_accuracy:[0.977]
epoch:[9/10] cost:[0.017] test_accuracy:[0.976]
Done.
