# CNN for Classification

In [1]:
import nbloader,os,warnings
warnings.filterwarnings("ignore") 
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import tensorflow as tf
import tensorflow.contrib.slim as slim
from sklearn.utils import shuffle
from util import gpusession,create_gradient_clipping,load_mnist_with_noise,print_n_txt,mixup
%matplotlib inline  
%config InlineBackend.figure_format = 'retina'
if __name__ == "__main__":
    print ("TensorFlow version is [%s]."%(tf.__version__))

TensorFlow version is [1.4.1].


### Define CNN Class

In [2]:
class cnn_cls_class(object):
    def __init__(self,_name='basic_cnn',_xdim=[28,28,1],_ydim=10,_hdims=[64,64],_filterSizes=[3,3],_max_pools=[2,2]
                 ,_feat_dim=128,_actv=tf.nn.relu,_bn=slim.batch_norm
                 ,_l2_reg_coef=1e-5
                 ,_momentum = 0.5
                 ,_USE_INPUT_BN=False,_USE_RESNET=False,_USE_GAP=False,_USE_SGD=False
                 ,_USE_MIXUP=False
                 ,_GPU_ID=0,_VERBOSE=True):
        self.name = _name 
        self.xdim = _xdim
        self.ydim = _ydim
        self.hdims = _hdims
        self.filterSizes = _filterSizes
        self.max_pools = _max_pools
        self.feat_dim = _feat_dim
        self.actv = _actv
        self.bn = _bn
        self.l2_reg_coef = _l2_reg_coef
        self.momentum = _momentum
        self.USE_INPUT_BN = _USE_INPUT_BN
        self.USE_RESNET = _USE_RESNET
        self.USE_GAP = _USE_GAP
        self.USE_SGD = _USE_SGD
        self.USE_MIXUP = _USE_MIXUP
        self.GPU_ID = (int)(_GPU_ID)
        self.VERBOSE = _VERBOSE
        with tf.device('/device:GPU:%d'%(self.GPU_ID)):
            # Build model
            self.build_model()
            # Build graph
            self.build_graph()
            # Check parameters
            self.check_params()
        
    def build_model(self):
        # Set placeholders
        _xdim = self.xdim[0]*self.xdim[1]*self.xdim[2] # Total dimension
        self.x = tf.placeholder(dtype=tf.float32,shape=[None,_xdim]) # Input [N x xdim]
        self.t = tf.placeholder(dtype=tf.float32,shape=[None,self.ydim]) # Output [N x D]
        self.kp = tf.placeholder(dtype=tf.float32,shape=[]) # []
        self.is_training = tf.placeholder(dtype=tf.bool,shape=[]) # []
        self.lr = tf.placeholder(dtype=tf.float32,shape=[]) # []
        self.bn_init     = {'beta': tf.constant_initializer(0.),
                           'gamma': tf.random_normal_initializer(1., 0.01)}
        batch_norm_params = {'is_training':self.is_training,'decay':0.9,'updates_collections': None}
        
        with tf.variable_scope(self.name,reuse=False) as scope:
            
            # List of features
            self.layers = []
            self.layers.append(self.x)

            # Reshape input 
            _net = tf.reshape(self.x,[-1]+self.xdim) 
            self.layers.append(_net) 
            
            # Input normalization 
            if self.USE_INPUT_BN:
                _net = slim.batch_norm(_net,param_initializers=self.bn_init,is_training=self.is_training,updates_collections=None)
                
            # Convolution layers
            for hidx,hdim in enumerate(self.hdims):
                fs = self.filterSizes[hidx]
                if self.USE_RESNET: # Use residual connection 
                    cChannelSize = _net.get_shape()[3] # Current channel size
                    if cChannelSize == hdim:
                        _identity = _net
                    else: # Expand dimension if required 
                        _identity = slim.conv2d(_net,hdim,[1,1],padding='SAME',activation_fn=None
                                              , weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
                                              , normalizer_fn       = self.bn
                                              , normalizer_params   = batch_norm_params
                                              , scope='identity_%d'%(hidx))
                    # First conv
                    _net = slim.conv2d(_net,hdim,[fs,fs],padding='SAME'
                                     , activation_fn       = None
                                     , weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
                                     , normalizer_fn       = self.bn
                                     , normalizer_params   = batch_norm_params
                                     , scope='res_a_%d'%(hidx))
                    # Relu
                    _net = self.actv(_net)
                    self.layers.append(_net) # Append to list
                    # Second conv
                    _net = slim.conv2d(_net,hdim,[fs,fs],padding='SAME'
                                     , activation_fn       = None
                                     , weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
                                     , normalizer_fn       = self.bn
                                     , normalizer_params   = batch_norm_params
                                     , scope='res_b_%d'%(hidx))
                    # Skip connection
                    _net = _net + _identity
                    # Relu
                    _net = self.actv(_net)
                    self.layers.append(_net) # Append to list
                else:
                    _net = slim.conv2d(_net,hdim,[fs,fs],padding='SAME'
                                     , activation_fn       = self.actv
                                     , weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
                                     , normalizer_fn       = self.bn
                                     , normalizer_params   = batch_norm_params
                                     , scope='conv_%d'%(hidx))
                    self.layers.append(_net) # Append to list
                # Max pooling (if required)
                max_pool = self.max_pools[hidx]
                if max_pool > 1:
                    _net = slim.max_pool2d(_net,[max_pool,max_pool],scope='pool_%d'%(hidx))
                    self.layers.append(_net) # Append to list
                
            # Global average pooling 
            if self.USE_GAP: 
                _net = tf.reduce_mean(_net,[1,2])
                self.layers.append(_net) # Append to list
                # Feature
                self.feat = _net # [N x Q]
            else:
                # Flatten and output
                _net = slim.flatten(_net, scope='flatten')
                self.layers.append(_net) # Append to list
                # Dense
                _net = slim.fully_connected(_net,self.feat_dim,scope='fc')
                self.layers.append(_net) # Append to list
                # Feature
                self.feat = _net # [N x Q]
            
            # Dropout at the last layer 
            _net = slim.dropout(_net, keep_prob=self.kp,is_training=self.is_training,scope='dropout')  
            _out = slim.fully_connected(_net,self.ydim,activation_fn=None,normalizer_fn=None, scope='out')# [N x D]
            self.layers.append(_out) # Append to list
            self.out = _out
            
    # Build graph
    def build_graph(self):
        # Cross-entropy loss
        self._loss_ce = tf.nn.softmax_cross_entropy_with_logits(labels=self.t,logits=self.out) # [N]
        self.loss_ce = tf.reduce_mean(self._loss_ce) # []
        # Weight decay regularizer
        _g_vars = tf.global_variables()
        _c_vars = [var for var in _g_vars if '%s/'%(self.name) in var.name]
        self.l2_reg = self.l2_reg_coef*tf.reduce_sum(tf.stack([tf.nn.l2_loss(v) for v in _c_vars])) # []
        # Total loss
        self.loss_total = self.loss_ce + self.l2_reg
        if self.USE_SGD:
            # self.optm = tf.train.GradientDescentOptimizer(learning_rate=self.lr).minimize(self.loss_total)
            self.optm = tf.train.MomentumOptimizer(learning_rate=self.lr,momentum=self.momentum).minimize(self.loss_total)
        else:
            self.optm = tf.train.AdamOptimizer(learning_rate=self.lr
                                               ,beta1=0.9,beta2=0.999,epsilon=1e-6).minimize(self.loss_total)
        # Accuracy
        _corr = tf.equal(tf.argmax(self.out, 1), tf.argmax(self.t, 1))    
        self.accr = tf.reduce_mean(tf.cast(_corr,tf.float32)) 
        
    # Check parameters
    def check_params(self):
        _g_vars = tf.global_variables()
        self.g_vars = [var for var in _g_vars if '%s/'%(self.name) in var.name]
        if self.VERBOSE:
            print ("==== Global Variables ====")
        for i in range(len(self.g_vars)):
            w_name  = self.g_vars[i].name 
            w_shape = self.g_vars[i].get_shape().as_list()
            if self.VERBOSE:
                print (" [%02d] Name:[%s] Shape:[%s]" % (i,w_name,w_shape))
        # Print layers
        if self.VERBOSE:
            print ("====== Layers ======")
            nLayers = len(self.layers)
            for i in range(nLayers):
                print ("[%02d/%d] %s %s"%(i,nLayers,self.layers[i].name,self.layers[i].shape))
    
    # Saver
    def save(self,_sess,_savename=None):
        if _savename==None:
            _savename='../net/net_%s.npz'%(self.name)
        # Get global variables 
        self.g_wnames,self.g_wvals,self.g_wshapes = [],[],[]
        for i in range(len(self.g_vars)):
            curr_wname = self.g_vars[i].name
            curr_wvar  = [v for v in tf.global_variables() if v.name==curr_wname][0]
            curr_wval  = _sess.run(curr_wvar)
            
            curr_wval_sqz = curr_wval
            # curr_wval_sqz  = curr_wval.squeeze() # ???
            curr_wval_sqz = np.asanyarray(curr_wval_sqz,order=(1,-1))
            
            self.g_wnames.append(curr_wname)
            self.g_wvals.append(curr_wval_sqz)
            self.g_wshapes.append(curr_wval.shape)
        # Save 
        np.savez(_savename,g_wnames=self.g_wnames,g_wvals=self.g_wvals,g_wshapes=self.g_wshapes)
        if self.VERBOSE: 
            print ("[%s] Saved. Size is [%.4f]MB" % 
                   (_savename,os.path.getsize(_savename)/1000./1000.))
    
    # Restore 
    def restore(self,_sess,_loadname=None):
        if _loadname==None:
            _loadname='../net/net_%s.npz'%(self.name)
        l = np.load(_loadname)
        g_wnames = l['g_wnames']
        g_wvals  = l['g_wvals']
        g_wshapes = l['g_wshapes']
        for widx,wname in enumerate(g_wnames):
            curr_wvar  = [v for v in tf.global_variables() if v.name==wname][0]
            _sess.run(tf.assign(curr_wvar,g_wvals[widx].reshape(g_wshapes[widx])))
        if self.VERBOSE:
            print ("Weight restored from [%s] Size is [%.4f]MB" % 
                   (_loadname,os.path.getsize(_loadname)/1000./1000.))
    
    # Train 
    def train(self,_sess,_trainimg,_trainlabel,_testimg,_testlabel,_valimg,_vallabel
              ,_maxEpoch=10,_batchSize=256,_lr=1e-3,_kp=0.9
              ,_LR_SCHEDULE=False,_PRINT_EVERY=10,_SAVE_BEST=True,_DO_AUGMENTATION=False,_VERBOSE_TRAIN=True):
        tf.set_random_seed(0)
        nTrain,nVal,nTest = _trainimg.shape[0],_valimg.shape[0],_testimg.shape[0]
        txtName = ('../res/res_%s.txt'%(self.name))
        f = open(txtName,'w') # Open txt file
        print_n_txt(_f=f,_chars='Text name: '+txtName)
        print_period=max(1,_maxEpoch//_PRINT_EVERY)
        maxIter,maxValAccr,maxTestAccr = max(nTrain//_batchSize,1),0.0,0.0
        for epoch in range(_maxEpoch+1): # For every epoch 
            _trainimg,_trainlabel = shuffle(_trainimg,_trainlabel) 
            for iter in range(maxIter): # For every iteration in one epoch
                start,end = iter*_batchSize,(iter+1)*_batchSize
                # Learning rate scheduling
                if _LR_SCHEDULE:
                    if epoch < 0.5*_maxEpoch:
                        _lr_use = _lr
                    elif epoch < 0.75*_maxEpoch:
                        _lr_use = _lr/10.0
                    else:
                        _lr_use = _lr/100.0
                else:
                    _lr_use = _lr
                if _DO_AUGMENTATION:
                    trainImgBatch = augment_img(_trainimg[start:end,:],self.xdim) 
                else:
                    trainImgBatch = _trainimg[start:end,:]
                if self.USE_MIXUP:
                    xBatch = trainImgBatch
                    tBatch = _trainlabel[start:end,:]
                    xBatch,tBatch = mixup(xBatch,tBatch,32)
                else:
                    xBatch = trainImgBatch
                    tBatch = _trainlabel[start:end,:]
                feeds = {self.x:xBatch,self.t:tBatch
                         ,self.kp:_kp,self.lr:_lr_use,self.is_training:True}
                _sess.run(self.optm,feed_dict=feeds)
            # Print training losses, training accuracy, validation accuracy, and test accuracy
            if (epoch%print_period)==0 or (epoch==(_maxEpoch)):
                batchSize4print = 512 
                # Compute train loss and accuracy
                maxIter4print = max(nTrain//batchSize4print,1)
                trainLoss,trainAccr,nTemp = 0,0,0
                for iter in range(maxIter4print):
                    start,end = iter*batchSize4print,(iter+1)*batchSize4print
                    feeds_train = {self.x:_trainimg[start:end,:],self.t:_trainlabel[start:end,:]
                             ,self.kp:1.0,self.is_training:False}
                    _trainLoss,_trainAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_train) 
                    _nTemp = end-start; nTemp+=_nTemp
                    trainLoss+=(_nTemp*_trainLoss); trainAccr+=(_nTemp*_trainAccr)
                trainLoss/=nTemp;trainAccr/=nTemp
                # Compute validation loss and accuracy
                maxIter4print = max(nVal//batchSize4print,1)
                valLoss,valAccr,nTemp = 0,0,0
                for iter in range(maxIter4print):
                    start,end = iter*batchSize4print,(iter+1)*batchSize4print
                    feeds_val = {self.x:_valimg[start:end,:],self.t:_vallabel[start:end,:]
                             ,self.kp:1.0,self.is_training:False}
                    _valLoss,_valAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_val) 
                    _nTemp = end-start; nTemp+=_nTemp
                    valLoss+=(_nTemp*_valLoss); valAccr+=(_nTemp*_valAccr)
                valLoss/=nTemp;valAccr/=nTemp
                # Compute test loss and accuracy
                maxIter4print = max(nTest//batchSize4print,1)
                testLoss,testAccr,nTemp = 0,0,0
                for iter in range(maxIter4print):
                    start,end = iter*batchSize4print,(iter+1)*batchSize4print
                    feeds_test = {self.x:_testimg[start:end,:],self.t:_testlabel[start:end,:]
                             ,self.kp:1.0,self.is_training:False}
                    _testLoss,_testAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_test) 
                    _nTemp = end-start; nTemp+=_nTemp
                    testLoss+=(_nTemp*_testLoss); testAccr+=(_nTemp*_testAccr)
                testLoss/=nTemp;testAccr/=nTemp
                # Compute max val accr
                if valAccr > maxValAccr:
                    maxValAccr = valAccr
                    maxTestAccr = testAccr
                    if _SAVE_BEST: self.save(_sess) 
                strTemp = (("[%02d/%d] [Loss] train:%.3f val:%.3f test:%.3f"
                            +" [Accr] train:%.1f%% val:%.1f%% test:%.1f%% maxVal:%.1f%% maxTest:%.1f%%")
                       %(epoch,_maxEpoch,trainLoss,valLoss,testLoss
                         ,trainAccr*100,valAccr*100,testAccr*100,maxValAccr*100,maxTestAccr*100))
                print_n_txt(_f=f,_chars=strTemp,_DO_PRINT=_VERBOSE_TRAIN)
        # Done 
        print ("Training finished.")
    
    # Test
    def test(self,_sess,_trainimg,_trainlabel,_testimg,_testlabel,_valimg,_vallabel):
        nTrain,nVal,nTest = _trainimg.shape[0],_valimg.shape[0],_testimg.shape[0]
        # Check accuracies (train, val, and test)
        batchSize4print = 512 
        # Compute train loss and accuracy
        maxIter4print = max(nTrain//batchSize4print,1)
        trainLoss,trainAccr,nTemp = 0,0,0
        for iter in range(maxIter4print):
            start,end = iter*batchSize4print,(iter+1)*batchSize4print
            feeds_train = {self.x:_trainimg[start:end,:],self.t:_trainlabel[start:end,:]
                     ,self.kp:1.0,self.is_training:False}
            _trainLoss,_trainAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_train) 
            _nTemp = end-start; nTemp+=_nTemp
            trainLoss+=(_nTemp*_trainLoss); trainAccr+=(_nTemp*_trainAccr)
        trainLoss/=nTemp;trainAccr/=nTemp
        # Compute validation loss and accuracy
        maxIter4print = max(nVal//batchSize4print,1)
        valLoss,valAccr,nTemp = 0,0,0
        for iter in range(maxIter4print):
            start,end = iter*batchSize4print,(iter+1)*batchSize4print
            feeds_val = {self.x:_valimg[start:end,:],self.t:_vallabel[start:end,:]
                     ,self.kp:1.0,self.is_training:False}
            _valLoss,_valAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_val) 
            _nTemp = end-start; nTemp+=_nTemp
            valLoss+=(_nTemp*_valLoss); valAccr+=(_nTemp*_valAccr)
        valLoss/=nTemp;valAccr/=nTemp
        # Compute test loss and accuracy
        maxIter4print = max(nTest//batchSize4print,1)
        testLoss,testAccr,nTemp = 0,0,0
        for iter in range(maxIter4print):
            start,end = iter*batchSize4print,(iter+1)*batchSize4print
            feeds_test = {self.x:_testimg[start:end,:],self.t:_testlabel[start:end,:]
                     ,self.kp:1.0,self.is_training:False}
            _testLoss,_testAccr = _sess.run([self.loss_total,self.accr],feed_dict=feeds_test) 
            _nTemp = end-start; nTemp+=_nTemp
            testLoss+=(_nTemp*_testLoss); testAccr+=(_nTemp*_testAccr)
        testLoss/=nTemp;testAccr/=nTemp
        strTemp = (("[%s] [Loss] train:%.3f val:%.3f test:%.3f"
                    +" [Accr] train:%.3f%% val:%.3f%% test:%.3f%%")
               %(self.name,trainLoss,valLoss,testLoss,trainAccr*100,valAccr*100,testAccr*100))
        print(strTemp)
    
if __name__ == "__main__":
    print ("cnn_cls_class defined.")

cnn_cls_class defined.


### Train CNN on MNIST

In [3]:
def get_mnist_config():
    trainimg,trainlabel,testimg,testlabel,valimg,vallabel \
        = load_mnist_with_noise(_errType='rp',_outlierRatio=0.9,_seed=0)
    xdim,ydim,hdims,filterSizes,max_pools,feat_dim = [28,28,1],10,[64,64],[3,3],[2,2],256
    actv,bn,VERBOSE = tf.nn.relu,slim.batch_norm,True 
    USE_INPUT_BN,USE_RESNET,USE_GAP,USE_MIXUP = False,True,False,False
    return trainimg,trainlabel,testimg,testlabel,valimg,vallabel \
        ,xdim,ydim,hdims,filterSizes,max_pools,feat_dim \
        ,actv,bn,VERBOSE \
        ,USE_INPUT_BN,USE_RESNET,USE_GAP,USE_MIXUP

In [4]:
if __name__ == "__main__": 
    trainimg,trainlabel,testimg,testlabel,valimg,vallabel \
        ,xdim,ydim,hdims,filterSizes,max_pools,feat_dim \
        ,actv,bn,VERBOSE \
        ,USE_INPUT_BN,USE_RESNET,USE_GAP,USE_MIXUP = get_mnist_config()
    tf.reset_default_graph()
    tf.set_random_seed(0) 
    CNN = cnn_cls_class(_name='basic_cnn_mnist',_xdim=xdim,_ydim=ydim,_hdims=hdims,_filterSizes=filterSizes,_max_pools=max_pools
                        ,_feat_dim=feat_dim,_actv=actv,_bn=bn,_l2_reg_coef=1e-5 
                        ,_USE_INPUT_BN=USE_INPUT_BN,_USE_RESNET=USE_RESNET,_USE_GAP=USE_GAP
                        ,_USE_MIXUP=USE_MIXUP,_GPU_ID=0,_VERBOSE=VERBOSE)
    sess = gpusession();sess.run(tf.global_variables_initializer()) 
    CNN.train(_sess=sess,_trainimg=trainimg,_trainlabel=trainlabel
              ,_testimg=testimg,_testlabel=testlabel,_valimg=valimg,_vallabel=vallabel
              ,_maxEpoch=50,_batchSize=256,_lr=1e-5,_LR_SCHEDULE=True,_PRINT_EVERY=10,_SAVE_BEST=True)

Extracting ../data/train-images-idx3-ubyte.gz
Extracting ../data/train-labels-idx1-ubyte.gz
Extracting ../data/t10k-images-idx3-ubyte.gz
Extracting ../data/t10k-labels-idx1-ubyte.gz
==== Global Variables ====
 [00] Name:[basic_cnn_mnist/identity_0/weights:0] Shape:[[1, 1, 1, 64]]
 [01] Name:[basic_cnn_mnist/identity_0/BatchNorm/beta:0] Shape:[[64]]
 [02] Name:[basic_cnn_mnist/identity_0/BatchNorm/moving_mean:0] Shape:[[64]]
 [03] Name:[basic_cnn_mnist/identity_0/BatchNorm/moving_variance:0] Shape:[[64]]
 [04] Name:[basic_cnn_mnist/res_a_0/weights:0] Shape:[[3, 3, 1, 64]]
 [05] Name:[basic_cnn_mnist/res_a_0/BatchNorm/beta:0] Shape:[[64]]
 [06] Name:[basic_cnn_mnist/res_a_0/BatchNorm/moving_mean:0] Shape:[[64]]
 [07] Name:[basic_cnn_mnist/res_a_0/BatchNorm/moving_variance:0] Shape:[[64]]
 [08] Name:[basic_cnn_mnist/res_b_0/weights:0] Shape:[[3, 3, 64, 64]]
 [09] Name:[basic_cnn_mnist/res_b_0/BatchNorm/beta:0] Shape:[[64]]
 [10] Name:[basic_cnn_mnist/res_b_0/BatchNorm/moving_mean:0] Shape

### Restore and Re-run

In [6]:
if __name__ == "__main__": 
    trainimg,trainlabel,testimg,testlabel,valimg,vallabel \
        ,xdim,ydim,hdims,filterSizes,max_pools,feat_dim \
        ,actv,bn,VERBOSE \
        ,USE_INPUT_BN,USE_RESNET,USE_GAP,USE_MIXUP = get_mnist_config()
    tf.reset_default_graph()
    tf.set_random_seed(0) 
    CNN2 = cnn_cls_class(_name='basic_cnn_mnist',_xdim=xdim,_ydim=ydim,_hdims=hdims,_filterSizes=filterSizes,_max_pools=max_pools
                        ,_feat_dim=feat_dim,_actv=actv,_bn=bn,_l2_reg_coef=1e-5 
                        ,_USE_INPUT_BN=USE_INPUT_BN,_USE_RESNET=USE_RESNET,_USE_GAP=USE_GAP
                        ,_USE_MIXUP=USE_MIXUP,_GPU_ID=0,_VERBOSE=False)
    sess = gpusession();sess.run(tf.global_variables_initializer()) 
    CNN2.restore(sess) # Restore weights
    CNN2.test(sess,_trainimg=trainimg,_trainlabel=trainlabel
             ,_testimg=testimg,_testlabel=testlabel,_valimg=valimg,_vallabel=vallabel)

Extracting ../data/train-images-idx3-ubyte.gz
Extracting ../data/train-labels-idx1-ubyte.gz
Extracting ../data/t10k-images-idx3-ubyte.gz
Extracting ../data/t10k-labels-idx1-ubyte.gz
[basic_cnn_mnist] [Loss] train:0.858 val:0.767 test:0.772 [Accr] train:53.861% val:65.299% test:65.214%
