# Train MLP for Regression 

In [1]:
import glob,os
import numpy as np
import tensorflow as tf
import scipy.io as sio
from inspect import isfunction
print ("Done.")

Done.


### Load

In [2]:
model_name = 'alphred' # alphred / panda
math_paths = glob.glob('../data/%s-dataset/*.mat'%(model_name))
print ("We have [%d] mat files."%(len(math_paths)))
for m_idx,mat_path in enumerate(math_paths):
    l = sio.loadmat(mat_path) 
    if m_idx == 0:
        in_q,out_t,out_w = l['in_q'],l['out_t'],l['out_w']
    else:
        in_q = np.concatenate((in_q,l['in_q']),axis=0)
        out_t = np.concatenate((out_t,l['out_t']),axis=0)
        out_w = np.concatenate((out_w,l['out_w']),axis=0)

x_data = in_q;
y_data = np.concatenate((out_t,out_w),axis=1)
n,xdim,ydim = x_data.shape[0],x_data.shape[1],y_data.shape[1]
print ("n:[%d], xdim:[%d], ydim:[%d]."%(n,xdim,ydim))

We have [12] mat files.
n:[120000], xdim:[150], ydim:[21].


### Define Class

In [3]:
class MultiLayerPerceptionClass(object):
    """
    MLP for Regression
    """
    def __init__(self,name='mlp',xdim=2,ydim=1,hdims=[64]*2,actv=tf.nn.relu,
                 adam_beta1=0.5,adam_beta2=0.9,adam_epsilon=1e-0,):
        self.name = name
        self.xdim = xdim
        self.ydim = ydim
        self.hdims = hdims
        self.actv = actv
        
        self.adam_beta1 = adam_beta1
        self.adam_beta2 = adam_beta2
        self.adam_epsilon = adam_epsilon
        
        with tf.variable_scope(self.name,reuse=False):
            self.build_graph()
    
    def build_graph(self):
        ki = tf.contrib.layers.variance_scaling_initializer() # tf.contrib.layers.xavier_initializer()
        bi = tf.constant_initializer(value=0)
        self.ph_x = tf.placeholder(shape=[None,self.xdim],dtype=tf.float32,name='ph_x') # [n x x_dim]
        self.ph_y_trgt = tf.placeholder(shape=[None,self.ydim],dtype=tf.float32,name='ph_y_trgt') # [n x y_dim]
        
        # Encoder netowrk F: ph_x => y_pred
        with tf.variable_scope('F',reuse=False):
            net = self.ph_x
            for h_idx,hdim in enumerate(self.hdims):
                net = tf.layers.dense(net,hdim,activation=self.actv,
                                      kernel_initializer=ki,bias_initializer=bi,
                                      name='hid_lin_%d'%(h_idx))
            self.y_pred = tf.layers.dense(net,self.ydim,activation=None,
                                          kernel_initializer=ki,bias_initializer=bi,
                                          name='y_pred') # [n x ydim]
        # Loss
        self.l1_coef = tf.placeholder(shape=[],dtype=tf.float32,name='l1_coef') # [1]
        self.l2_coef = tf.placeholder(shape=[],dtype=tf.float32,name='l2_coef') # [1]
        self.l1_losses = tf.reduce_sum(tf.abs(self.ph_y_trgt-self.y_pred),axis=1) # [N]
        self.l2_losses = tf.reduce_sum((self.ph_y_trgt-self.y_pred)**2,axis=1) # [N]
        self.reg_losses = self.l1_coef*self.l1_losses + self.l2_coef*self.l2_losses
        self.reg_loss = tf.reduce_mean(self.reg_losses) # [1]
        
        # Optimizer
        self.t_vars = [var for var in tf.trainable_variables() if '%s/'%(self.name) in var.name] 
        self.lr = tf.placeholder(shape=[],dtype=tf.float32,name='lr') # [1]
        self.optm = tf.train.AdamOptimizer(
                self.lr,
                beta1=self.adam_beta1,beta2=self.adam_beta2,epsilon=self.adam_epsilon).minimize(
                self.reg_loss,
                var_list=self.t_vars,name='optm') # X encoder + Y decoder
        
    def update(self,sess,x_batch,y_batch,l1_coef=1.0,l2_coef=1.0,lr=1e-3):
        """
        Update
        """
        feeds = {self.ph_x:x_batch,self.ph_y_trgt:y_batch,self.l1_coef:l1_coef,self.l2_coef:l2_coef,self.lr:lr}
        _,loss_val = sess.run([self.optm,self.reg_loss],feed_dict=feeds)
        return loss_val

    # Save network information to matfile 
    def save_to_mat(self,sess,epoch=0,suffix='',SAVE_MAT=True,CHANGE_VAR_NAME=True,VERBOSE=True):
        """
            Save to a mat file 
        """
        v_names,d = [],{}
        c_name = self.name
        t_vars = self.t_vars # trainable variables
        for v_idx,var in enumerate(t_vars):
            w_name,v_name = var.name,var.name
            v_shape = var.get_shape().as_list()
            if CHANGE_VAR_NAME:
                v_name = v_name.replace('/','_') # replace '/' => '_'
                v_name = v_name.replace(':','_') # replace ':' => '_'
                v_name = v_name.replace('%s_'%(c_name),'') # remove class name
                v_name = (v_name[::-1].split('_',1)[1])[::-1] 
                # remove characters after LAST '_' (hid_0_kernel_0 -> hid_0_kernel)
            if ('kernel:' in w_name) or ('bias:' in w_name) or \
                ('moving_mean:' in w_name) or ('moving_variance:' in w_name) or \
                ('gamma:' in w_name) or ('beta:' in w_name): 
                v_names.append(v_name)
                v_val = sess.run(var)
                d[v_name] = v_val
                
        # Class properties
        props = ['name','xdim','ydim','hdims','actv']
        for prop in props:
            if (isfunction(getattr(self,prop))): # function name
                d[prop] = getattr(self,prop).__name__
            else: # others
                d[prop] = getattr(self,prop)
                
        # Validation data
        props = ['x_vald','y_vald']
        n_vald = 10
        x_vald = np.random.randn(n_vald,self.xdim)
        y_vald = sess.run(self.y_pred,feed_dict={self.ph_x:x_vald})
        for prop in props:
            d[prop] = vars()[prop]
            
        # Check names and types of things to save
        for k_idx,key in enumerate(d.keys()):
            item_type = type(d[key]).__name__
            if VERBOSE:
                print ("  [%02d] Name:[%s] Type:[%s]."%(k_idx,key,item_type))
        
        # Save to a mat file
        if SAVE_MAT:
            dir_path = 'nets/%s'%(self.name)
            mat_path = os.path.join(dir_path,'weights%s.mat'%(suffix))
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
                print ("[%s] created."%(dir_path))
            sio.savemat(mat_path,d) # save to a mat file
            print ("[%s] saved. Size is[%.3f]MB."%(mat_path,os.path.getsize(mat_path) / 1000000))
            
def gpu_sess(): 
    config = tf.ConfigProto(); 
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    return sess        
        
print ("Done.")

Done.


### Hyper parameter

In [4]:
hdims = [256]*3
actv = tf.nn.relu
l1_coef,l2_coef = 1.0,5.0
lr = 1e-3
print ("Done.")

Done.


### Loop

In [None]:
tf.reset_default_graph()
M = MultiLayerPerceptionClass(
    name='mlp_%s'%(model_name),xdim=xdim,ydim=ydim,hdims=hdims,actv=actv)
sess = gpu_sess()
sess.run(tf.global_variables_initializer())
print ("Done.")

n_epoch,batch_size = 10,128
max_iter,print_every = int(n*n_epoch/batch_size),500
for it in range(int(max_iter)): 
    zero_to_one = it/max_iter
    one_to_zero = 1.0-zero_to_one # for lr schedule 
    r_idx = np.random.permutation(n)[:batch_size]
    x_batch,y_batch = x_data[r_idx,:],y_data[r_idx,:]
    
    # Update
    loss_val = M.update(
        sess=sess,x_batch=x_batch,y_batch=y_batch,l1_coef=l1_coef,l2_coef=l2_coef,lr=lr*one_to_zero)
    
    # Print results every some iterations 
    if ((it % print_every) == 0) or ((it+1) == max_iter):         
        print ("[%d/%d][%.2f] loss_val:[%.3f]."%(it,max_iter,zero_to_one,loss_val))
        
# Save
M.save_to_mat(sess)
print ("Done.")

Done.
[0/9375][0.00] loss_val:[187.710].
[500/9375][0.05] loss_val:[4.164].
[1000/9375][0.11] loss_val:[3.348].
[1500/9375][0.16] loss_val:[3.213].
[2000/9375][0.21] loss_val:[2.805].
