# ConvBonsai Tree

### Importing Libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from IPython.display import display, clear_output
from sklearn.model_selection import train_test_split as tts
import sys
import time
import os
import argparse
%matplotlib inline
import pandas as pd
import matplotlib.image as img

In [3]:
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('-lr', '--lr', default = 0.01)
parser.add_argument('-tree_depth', '--tdepth', default = 2)
parser.add_argument('-conv_depth', '--cdepth', default = 2)
parser.add_argument('-sparsity', '--sty', default = 0.995)
parser.add_argument('-regularization', '--reg', default = 0.000001)

_StoreAction(option_strings=['-regularization', '--reg'], dest='reg', nargs=None, const=None, default=1e-06, type=None, choices=None, help=None, metavar=None)

In [4]:
args = parser.parse_args('')

# Dataset

We have used mnist dataset you can use your own

In [4]:
#Loading Pre-processed dataset for Bonsai
# dirc = './Datasets/mnist_small/'
# Xtrain = np.load(dirc + 'Xtrain.npy')
# Ytrain = np.load(dirc + 'Ytrain.npy')
# Xtrain, Xtest, Ytrain, Ytest = tts(Xtrain, Ytrain, stratify = Ytrain, test_size = 0.25)

(Xtrain, Ytrain), (Xtest, Ytest) = tf.keras.datasets.cifar10.load_data()

In [5]:
"""
one hot encoder for converting integer values to one hot vector
"""
from sklearn.preprocessing import LabelEncoder as LE 
from sklearn.preprocessing import OneHotEncoder as OHE
mo1 = LE()
mo2 = OHE()
Ytrain = mo2.fit_transform(mo1.fit_transform((Ytrain.ravel())).reshape(-1,1)).todense()
Ytest = mo2.transform(mo1.transform((Ytest.ravel())).reshape(-1,1)).todense()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [6]:
# N, dDims = X_train.shape
N,W,H,C = Xtrain.shape
dDims = W*H*C
# nClasses = len(np.unique(Y_train))
nClasses = Ytrain.shape[1]
print('Training Size:',N,',Data Dims:', dDims,',No. Classes:', nClasses)

Training Size: 50000 ,Data Dims: 3072 ,No. Classes: 10


In [7]:
# Preprocessing files
def preprocess(x):
    z = (x - x.mean(axis=(0,1,2), keepdims=True)) / x.std(axis=(0,1,2), keepdims=True)
    N, W, H, X = z.shape
    return z.reshape(N, -1)

In [8]:
print("Preprocessing the dataset...")
Xtrain = preprocess(Xtrain)
Xtest = preprocess(Xtest)

Preprocessing the dataset...


Tensorboard Visualization writer

In [9]:
writer = tf.summary.FileWriter('convbonsai')

## Model Class Definition

In [10]:
class ConvBonsai():
    def __init__(self, nClasses, dDims, pDims, tDepth, sigma, kernelsshp, strides, cDepth = 2, W=None, T=None, V=None, Z=None, ch = None):
        '''
        dDims : data Dimensions
        pDims : projected Dimesions
        nClasses : num Classes
        tDepth : tree Depth
        
        Expected Dimensions:
        --------------------
        Bonsai Params // Optional
        
        W [numClasses*totalNodes, projectionDimension]
        V [numClasses*totalNodes, projectionDimension]
        Z [projectionDimension, dataDimension + 1]
        T [internalNodes, projectionDimension]

        internalNodes = 2**treeDepth - 1
        totalNodes = 2*internalNodes + 1

        sigma - tanh non-linearity
        sigmaI - Indicator function for node probabilities
        sigmaI - has to be set to infinity(1e9 for practicality)
        
        while doing testing/inference
        numClasses will be reset to 1 in binary case
        '''
        
        # Initialization of parameter variables
        
        self.dDims = dDims
        self.pDims = pDims
        
        # If number of classes is two we dont need to calculate other class probability
        if nClasses == 2:
            self.nClasses = 1
        else:
            self.nClasses = nClasses

        self.tDepth = tDepth
        self.sigma = sigma
        self.iNodes = 2**self.tDepth - 1
        self.tNodes = 2*self.iNodes + 1
        
        self.cDepth = cDepth
        self.ciNodes = 2**self.cDepth - 1
        self.ctNodes = 2*self.ciNodes + 1
        
        
        self.kernelsT = []
        
        self.strides = []
        
        if(ch is None):
            ch = 3
        
        self.channels = ch    
        var = int(np.sqrt(self.dDims/self.channels))
        self.d1 = var
        self.d2 = var
        d1 = self.d1
        d2 = self.d2
        
        assert d1*d2*ch == self.dDims, " Dimension mismatch, doesn't seem like it's a image or set channel(ch) = 1"
        
        oD1 = d1
        oD2 = d2
        
        self.wts = []
        self.wts1 = []
        self.wts2 = []
        self.bs = []
        
        
        h = 0
        h_old = 0
        Codims1 = self.d1
        Codims2 = self.d2
        
        with tf.name_scope("Params"):
            for i in range(self.ctNodes):

                h = int(np.floor(np.log(i+1)/np.log(2)))

                self.kernelsT.append(
                    tf.get_variable('kernelT'+str(i), kernelsshp[h], 
                                 initializer=tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32),
                                 dtype=tf.float32)
                )

                self.strides.append(strides[h])


            for i in range(self.cDepth+1):
                Codims1 = np.floor((Codims1 - kernelsshp[i][0])/(strides[i][1])) + 1
                Codims2 = np.floor((Codims2 - kernelsshp[i][1])/(strides[i][2])) + 1


            self.CoDims = int(Codims1*Codims2) + 1
            self.pDims = self.CoDims
            self.Z = tf.Variable(tf.random_normal([2,2]), name='Z', dtype=tf.float32) 

            self.W = tf.Variable(tf.random_normal([self.ctNodes - self.ciNodes, self.nClasses * self.tNodes, self.pDims]), name='W', dtype=tf.float32)
            self.V = tf.Variable(tf.random_normal([self.ctNodes - self.ciNodes, self.nClasses * self.tNodes, self.pDims]), name='V', dtype=tf.float32)
            self.T = tf.Variable(tf.random_normal([self.ctNodes - self.ciNodes, self.iNodes, self.pDims]), name='T', dtype=tf.float32)
        
        
        self.score = None
        self.X_ = None
        self.prediction = None
        self.convs = []
        self.cnodeProb = []
        self.nodeProb = []
        self.scores = []
    
    def __call__(self, X, sigmaI):
        '''
        Function to build the Bonsai Tree graph
        
        Expected Dimensions
        -------------------
        X is [_, self.dDims]
        X_ is [_, self.pDims]
        '''
        errmsg = "Dimension Mismatch, X is [_, self.dataDimension]"
        assert (len(X.shape) == 2 and int(X.shape[1]) == self.dDims), errmsg
        
        sigmaI = tf.reshape(sigmaI, [1,1])
        
        # return score, X_ if exists where X_ is the projected X, i.e X_ = (Z.X)/(D^)
        if self.score is not None:
            return self.score, self.X_
        
        
        Ximg = tf.reshape(X, [-1,self.d1,self.d2,self.channels])
        
        self.convs = []
        
        
        # For Root Node score...
        self.__cnodeProb = [] # node probability list
        self.__cnodeProb.append(1) # probability of x passing through root is 1.
        
        with tf.name_scope('ConvNode'+str(0)):
        # All score sums variable initialized to root score... for each tree (Note: can be negative)
            convT = 0.1*tf.nn.leaky_relu(tf.nn.conv2d(Ximg,
                self.kernelsT[0],
                padding="VALID",
                strides = self.strides[0]), name = 'convT0')

            self.convs.append(convT)

            flatConv = tf.layers.Flatten()(convT)
            b = tf.squeeze(flatConv.shape[1])
            self.wts.append(tf.Variable(tf.random_normal([1, b]), name='wts' + str(0), dtype=tf.float32))
            self.bs.append(tf.Variable(tf.random_normal([1, 1]), name='bs' + str(0), dtype=tf.float32))

            finalImg =  None


            fscore_ = None
            fX_ = None
            self.__nodeProbs = []
        
        for i in range(1,self.ctNodes):
            with tf.name_scope('ConvNode'+str(i)):
                
                parent_id = int(np.ceil(i / 2.0) - 1.0)

                convTprev = self.convs[parent_id]
                flatConvP = tf.layers.Flatten()(convTprev)


                cscore = tf.multiply(sigmaI, tf.matmul(self.wts[parent_id], flatConvP, transpose_b = True) + self.bs[parent_id])# 1 x _

                # Calculating probability that x should come to this node next given it is in parent node...
                cprob = tf.divide((1 + ((-1)**(i + 1))*tf.tanh(cscore)),2.0) # : scalar 1 x_
                cprob = self.__cnodeProb[parent_id] * cprob # : scalar 1 x _


                # adding prob to node prob list
                self.__cnodeProb.append(cprob)

                convT = 0.1*tf.nn.leaky_relu(tf.nn.conv2d(convTprev,
                    self.kernelsT[i],
                    padding="VALID",
                    strides = self.strides[i]), name = 'convT' + str(i))

                self.convs.append(convT)

                flatConv = tf.layers.Flatten()(convT)
                b = tf.squeeze(flatConv.shape[1])

                self.wts.append(tf.Variable(tf.random_normal([1, b]), name='wts' + str(i), dtype=tf.float32))
                self.bs.append(tf.Variable(tf.random_normal([1, 1]), name='bs' + str(i), dtype=tf.float32))
            

            
            if(i+1 > self.ciNodes):
                # projected output of convolutional layers....
                
                iinum = i - self.ciNodes
                
                a,b = flatConv.shape
                onesmat = flatConv[:,0:1]*0 + 1

                flat_imgs = tf.concat([flatConv, onesmat], axis = 1)
   
                X_ = tf.transpose(flat_imgs)#tf.matmul(self.Z, flat_imgs, transpose_b = True)

                # For Root Node score...
                tnodeProb = [] # node probability list
                tnodeProb.append(cprob) # probability of x passing through root is 1.
                W_ = self.W[iinum, 0:(self.nClasses),:]# first K trees root W params : KxD^
                V_ = self.V[iinum, 0:(self.nClasses),:]# first K trees root V params : KxD^

                # All score sums variable initialized to root score... for each tree (Note: can be negative)
                score_ = tnodeProb[0]*tf.multiply(tf.matmul(W_, X_), tf.tanh(self.sigma * tf.matmul(V_, X_))) # : Kx_
                self.scores.append(flat_imgs)

                for t in range(1, self.tNodes):
                    with tf.name_scope('BonNode'+str(i)+str(t)):
                    # current node is i
                    # W, V of K different trees for current node
                        W_ = self.W[iinum,t * self.nClasses:((t + 1) * self.nClasses),:]# : KxD^
                        V_ = self.V[iinum,t * self.nClasses:((t + 1) * self.nClasses),:]# : KxD^


                        # i's parent node shared theta param reshaping to 1xD^
                        T_ = tf.reshape(self.T[iinum,int(np.ceil(t / 2.0) - 1.0),:],[-1, self.pDims])# : 1xD^

                        # Calculating probability that x should come to this node next given it is in parent node...
                        prob = tf.divide((1 + ((-1)**(t + 1))*tf.tanh(tf.multiply(sigmaI, tf.matmul(T_, X_)))),2.0) # : scalar 1x_

                        # Actual probability that x will come to this node...p(parent)*p(this|parent)...
                        prob = tnodeProb[int(np.ceil(t / 2.0) - 1.0)] * prob # : scalar 1x_

                        # adding prob to node prob list
                        tnodeProb.append(prob)
                        # New score addes to sum of scores...
                        score_ += tnodeProb[t]*tf.multiply(tf.matmul(W_, X_), tf.tanh(self.sigma * tf.matmul(V_, X_))) # Kx_

                self.scores.append(score_)
                self.__nodeProbs.append(tnodeProb[1:])

                if(fscore_ is None):
                    fscore_ = score_
                    fX_ = tf.matmul(T_, X_)*cprob
                else:
                    fscore_ = fscore_ + score_
                    fX_ = fX_ + tf.matmul(T_, X_)*cprob
            else:
                pass
                
        self.score = fscore_ 
        self.X_ = fX_
        self.nodeProb = tf.convert_to_tensor(self.__nodeProbs[:])
        self.cnodeProb = tf.convert_to_tensor(self.__cnodeProb[1:])
        self.layers = self.convs
        return self.score, self.X_
                
   
        
    
    def predict(self):
        '''
        Takes in a score tensor and outputs a integer class for each data point
        '''
        if self.prediction is not None:
            return self.prediction
        if self.nClasses > 2:
            self.prediction = tf.argmax(tf.transpose(self.score), 1) # score is 1xk
        else:
            self.prediction = tf.argmax(tf.concat([tf.transpose(self.score),0*tf.transpose(self.score)], 1), 1)
        return self.prediction

    def assert_params(self):
        
        # Asserting Initializaiton
        
        errRank = "All Parameters must has only two dimensions shape = [a, b]"
        assert len(self.W.shape) == len(self.Z.shape), errRank
        assert len(self.W.shape) == len(self.T.shape), errRank
        assert len(self.W.shape) == 2, errRank
        msg = "W and V should be of same Dimensions"
        assert self.W.shape == self.V.shape, msg
        errW = "W and V are [numClasses*totalNodes, projectionDimension]"
        assert self.W.shape[0] == self.nClasses * self.tNodes, errW
        assert self.W.shape[1] == self.pDims, errW
        errZ = "Z is [projectionDimension, dataDimension]"
        assert self.Z.shape[0] == self.pDims, errZ
        assert self.Z.shape[1] == self.dDims, errZ
        errT = "T is [internalNodes, projectionDimension]"
        assert self.T.shape[0] == self.iNodes, errT
        assert self.T.shape[1] == self.pDims, errT
        assert int(self.nClasses) > 0, "numClasses should be > 1"
        msg = "# of features in data should be > 0"
        assert int(self.dDims) > 0, msg
        msg = "Projection should be  > 0 dims"
        assert int(self.pDims) > 0, msg
        msg = "treeDepth should be >= 0"
        assert int(self.tDepth) >= 0, msg

In [11]:
class ConvBonsaiTrainer():
    
    def __init__(self, tree, lW, lT, lV, lZ, lr, X, Y, sW, sV, sZ, sT):
        
        '''
        bonsaiObj - Initialised Bonsai Object and Graph...
        lW, lT, lV and lZ are regularisers to Bonsai Params...
        sW, sT, sV and sZ are sparsity factors to Bonsai Params...
        lr - learningRate fro optimizer...
        X is the Data Placeholder - Dims [_, dataDimension]
        Y - Label placeholder for loss computation
        useMCHLoss - For choice between HingeLoss vs CrossEntropy
        useMCHLoss - True - MultiClass - multiClassHingeLoss
        useMCHLoss - False - MultiClass - crossEntropyLoss
        '''
        #  Intializations of training parameters
        self.tree = tree
        
        # regularization params lambdas(l) (all are scalars)
        self.lW = lW
        self.lV = lV
        self.lT = lT
        self.lZ = lZ

        # sparsity parameters (scalars all...) will be used to calculate percentiles to make other cells zero
        self.sW = sW 
        self.sV = sV
        self.sT = sT
        self.sZ = sZ

        # placeholders for inputs and labels
        self.Y = Y # _ x nClasses
        self.X = X # _ x D
        
        # learning rate
        self.lr = lr
        
        # Asserting initialization
        self.assert_params()
        
        # place holder for path selection parameter sigmaI
        self.sigmaI = tf.placeholder(tf.float32, name='sigmaI')
        # invoking __call__ of tree getting initial values of score and projected X
        self.score, self.X_ = self.tree(self.X, self.sigmaI)
        # defining loss function tensorflow graph variables.....
        self.loss, self.marginLoss, self.regLoss = self.lossGraph()
        # defining single training step graph process ...
        self.tree.TrainStep = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
        self.trainStep = self.tree.TrainStep
        # defining accuracy and prediction graph objects
        self.accuracy = self.accuracyGraph()
        self.prediction = self.tree.predict()
        
        
        # set all parameters above 0.99 if dont want to use IHT
        if self.sW > 0.99 and self.sV > 0.99 and self.sZ > 0.99 and self.sT > 0.99:
            self.isDenseTraining = True
        else:
            self.isDenseTraining = False
            
        # setting the hard thresholding graph obejcts
        self.hardThrsd()
        
    def hardThrsd(self):
        '''
        Set up for hard Thresholding Functionality
        '''
        with tf.name_scope("IHT"):
            # place holders for sparse parameters....
            self.__Wth = tf.placeholder(tf.float32, name='Wth')
            self.__Vth = tf.placeholder(tf.float32, name='Vth')
            self.__Zth = tf.placeholder(tf.float32, name='Zth')
            self.__Tth = tf.placeholder(tf.float32, name='Tth')

            # assigning the thresholded values to params as a graph object for tensorflow....
            self.__Woph = self.tree.W.assign(self.__Wth)
            self.__Voph = self.tree.V.assign(self.__Vth)
            self.__Toph = self.tree.T.assign(self.__Tth)
            self.__Zoph = self.tree.Z.assign(self.__Zth)

            # grouping the graph objects as one object....
            self.hardThresholdGroup = tf.group(
                self.__Woph, self.__Voph, self.__Toph, self.__Zoph)
        
    def hardThreshold(self, A, s):
        '''
        Hard thresholding function on Tensor A with sparsity s
        '''
        # copying to avoid errors....
        A_ = np.copy(A)
        # flattening the tensor...
        A_ = A_.ravel()
        if len(A_) > 0:
            # calculating the threshold value for sparse limit...
            th = np.percentile(np.abs(A_), (1 - s) * 100.0, interpolation='higher')
            # making sparse.......
            A_[np.abs(A_) < th] = 0.0
        # reconstructing in actual shape....
        A_ = A_.reshape(A.shape)
        return A_

    def accuracyGraph(self):
        '''
        Accuracy Graph to evaluate accuracy when needed
        '''
        with tf.name_scope("ACC"):
            if (self.tree.nClasses > 2):
                correctPrediction = tf.equal(tf.argmax(tf.transpose(self.score), 1), tf.argmax(self.Y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correctPrediction, tf.float32))
            else:
                # some accuracy functional analysis for 2 classes could be different from this...
                y_ = self.Y * 2 - 1
                correctPrediction = tf.multiply(tf.transpose(self.score), y_)
                correctPrediction = tf.nn.relu(correctPrediction)
                correctPrediction = tf.ceil(tf.tanh(correctPrediction)) # final predictions.... round to(0 or 1)
                self.accuracy = tf.reduce_mean(
                    tf.cast(correctPrediction, tf.float32))

        return self.accuracy
        
    
    def lossGraph(self):
        '''
        Loss Graph for given tree
        '''
        with tf.name_scope("Loss"):
            # regularization losses.....
            self.regLoss = 0.5 * (self.lZ * tf.square(tf.norm(self.tree.Z)) +
                              self.lW * tf.square(tf.norm(self.tree.W)) +
                              self.lV * tf.square(tf.norm(self.tree.V)) +
                              self.lT * tf.square(tf.norm(self.tree.T)))

            llen = self.tree.ciNodes
            var = 0
            for i in range(llen):
                var = var +  self.lT * tf.square(tf.norm(self.tree.wts[i]))

            self.regLoss = self.regLoss + var

            # emperical actual loss.....
            if (self.tree.nClasses > 2):
                '''
                Cross Entropy loss for MultiClass case in joint training for
                faster convergence
                '''
                # cross entropy loss....
                self.marginLoss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.transpose(self.score),
                                                                   labels=tf.stop_gradient(self.Y)))
            else:
                # sigmoid loss....
                self.marginLoss = tf.reduce_mean(tf.nn.relu(1.0 - (2 * self.Y - 1) * tf.transpose(self.score)))

            # adding the losses...
            self.loss = self.marginLoss + self.regLoss
        return self.loss, self.marginLoss, self.regLoss
        
    def assert_params(self):
        # asserting the initialization....
        err = "sparsity must be between 0 and 1"
        assert self.sW >= 0 and self.sW <= 1, "W " + err
        assert self.sV >= 0 and self.sV <= 1, "V " + err
        assert self.sZ >= 0 and self.sZ <= 1, "Z " + err
        assert self.sT >= 0 and self.sT <= 1, "T " + err
        errMsg = "Dimension Mismatch, Y has to be [_, " + str(self.tree.nClasses) + "]"
        errCont = " numClasses are 1 in case of Binary case by design"
        assert (len(self.Y.shape) == 2 and self.Y.shape[1] == self.tree.nClasses), errMsg + errCont
        
        

        
    def train(self, batchSize, totalEpochs, sess, Xtrain, Xval, Ytrain, Yval, saver, filename,valsig):
        iht = 0 # to keep a note if thresholding has been started ...
        numIters = Xtrain.shape[0] / batchSize # number of batches at a time...
        totalBatches = numIters * totalEpochs # total number of batch operations...
        treeSigmaI = valsig # controls the fidelity of the approximation too high can saturate tanh.
            
        maxTestAcc = -10000
        itersInPhase = 0
        
        for i in range(totalEpochs):
            print("\nEpoch Number: " + str(i))
            # defining training acc and loss
            trainAcc = 0.0
            trainAccOld = 0.0
            trainLoss = 0.0
            trainBest = 0.0
            
            numIters = int(numIters)
            
            for j in range(numIters):
                # creating batch.....sequentiall could be done randomly using choice function...
                mini_batchX = Xtrain[j*batchSize:(j+1)*batchSize,:] # B x D
                mini_batchY = Ytrain[j*batchSize:(j+1)*batchSize] # B x 
            
                # feed for training using tensorflow graph based gradient descent approach......
                _feed_dict = {self.X: mini_batchX, self.Y: mini_batchY,
                                  self.sigmaI: treeSigmaI}
                
                # training the tensorflow graph
                _, batchLoss, batchAcc = sess.run(
                    [self.trainStep, self.loss, self.accuracy],
                    feed_dict=_feed_dict)
                
                # calculating acc....
                trainAcc += batchAcc
                trainLoss += batchLoss
                
                
                
                # to update sigmaI.....
                if ((itersInPhase) % 100 == 0):
                    
                    # Making a random batch....
                    indices = np.random.choice(Xtrain.shape[0], 100)
                    rand_batchX = Xtrain[indices, :]
                    rand_batchY = Ytrain[indices, :]
                    rand_batchY = np.reshape(rand_batchY, [-1, self.tree.nClasses])

                    _feed_dict = {self.X: rand_batchX,
                                  self.sigmaI: treeSigmaI}
                    # Projected matrix...
                    Xcapeval = self.X_.eval(feed_dict=_feed_dict) # D^ x 1
                    sum_tr = 0.0 
                    for k in range(0, self.tree.iNodes):
                        sum_tr += (np.sum(np.abs(Xcapeval)))

                    
                    if(self.tree.iNodes > 0):
                        sum_tr /= (self.tree.iNodes) # normalizing all sums
                        sum_tr = 1 / sum_tr # inverse of average sum
                    else:
                        sum_tr = 0.1
                    # thresholding inverse of sum as min(1000, sum_inv*2^(cuurent batch number / total bacthes / 30))
                    sum_tr = min(
                        1000, sum_tr * (2**(float(itersInPhase) /
                                            (float(totalBatches) )))*valsig/30)
                    # assiging higher values as convergence is reached...
                    treeSigmaI = max(sum_tr, treeSigmaI)
                    
                itersInPhase+=1
                
                
                # to start hard thresholding after half_time(could vary) ......
                if((itersInPhase//numIters > (1/2)*totalEpochs) and (not self.isDenseTraining)):
                    if(iht == 0):
                        print('\n\nHard Thresolding Started\n\n')
                        iht = 1
                    
                    # getting the current estimates of  W,V,Z,T...
                    currW = self.tree.W.eval()
                    currV = self.tree.V.eval()
                    currZ = self.tree.Z.eval()
                    currT = self.tree.T.eval()

                    # Setting a method to make some values of matrix zero....
                    self.__thrsdW = self.hardThreshold(currW, self.sW)
                    self.__thrsdV = self.hardThreshold(currV, self.sV)
                    self.__thrsdZ = self.hardThreshold(currZ, self.sZ)
                    self.__thrsdT = self.hardThreshold(currT, self.sT)

                    # runnign the hard thresholding graph....
                    fd_thrsd = {self.__Wth: self.__thrsdW, self.__Vth: self.__thrsdV,
                                self.__Zth: self.__thrsdZ, self.__Tth: self.__thrsdT}
                    sess.run(self.hardThresholdGroup, feed_dict=fd_thrsd)
                    
            
            
            print("Train Loss: " + str(trainLoss / numIters) +
                  " Train accuracy: " + str(trainAcc / numIters))
            print("SigmaI :",treeSigmaI,":LR:",self.lr)
            
            # calculating the test accuracies with sigmaI as expected -> inf.. = 10^9
            oldSigmaI = treeSigmaI
            treeSigmaI = 1e9
            
            # test feed for tf...
            _feed_dict = {self.X: Xval, self.Y: Yval,
                                  self.sigmaI: treeSigmaI}
            
            # calculating losses....
            testAcc, testLoss, regTestLoss = sess.run([self.accuracy, self.loss, self.regLoss], feed_dict=_feed_dict)
            
            
            if maxTestAcc <= testAcc:
                maxTestAccEpoch = i
                maxTestAcc = testAcc
                saver.save(sess, filename + "/model_best")
                
            
            print("Test accuracy %g" % testAcc)
            print("MarginLoss + RegLoss: " + str(testLoss - regTestLoss) +
                  " + " + str(regTestLoss) + " = " + str(testLoss) + "\n", end='\r')
            
            
            treeSigmaI = oldSigmaI
            
        # sigmaI has to be set to infinity to ensure
        # only a single path is used in inference
        treeSigmaI = 1e9
        print("\nMaximum Test accuracy at compressed" +
              " model size(including early stopping): " +
              str(maxTestAcc) + " at Epoch: " +
              str(maxTestAccEpoch + 1) + "\nFinal Test" +
              " Accuracy: " + str(testAcc))

In [19]:
"""
kernelsshp : kernel sizes for convolutional filters at each level in tree
strides : strides for convolutional layers at each level in tree
tDepth : bonsai tree depth after conv tree ends
cDepth : depth on convolutional tree
ch : number of channels in image
lW, lT, lV, lZ : regularization params
lr : learning rate
sZ,sW,sV,sT : sparsity constraints on params Z,W,V,T
"""
tf.reset_default_graph()
print("Creating the model graph and training graph..")
kernelsshp = [[4,4,3,3],[4,4,3,2],[3,3,2,1],[2,2,5,1],[3,3,1,1],[3,3,1,1],[3,3,1,1]]
strides = [[1,2,2,1],[1,2,2,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]]

cdepth = args.cdepth
tdepth = args.tdepth
tree = ConvBonsai(nClasses = nClasses, dDims = dDims, pDims = 28, tDepth = tdepth, sigma = 1,
              kernelsshp = kernelsshp, strides = strides , cDepth = cdepth, ch = 3)

X = tf.placeholder("float32", [None, dDims])
Y = tf.placeholder("float32", [None, nClasses])
reg = args.reg
sty = args.sty
lrm = args.lr
bonsaiTrainer = ConvBonsaiTrainer(tree, lW = reg, lT = reg, lV = reg, lZ = reg, lr = lrm, X = X, Y = Y,
                              sZ = sty, sW = sty, sV = sty, sT = sty)
init_op = tf.global_variables_initializer()
print("Done Creating the graphs.")

Creating the model graph and training graph..
Done Creating the graphs.


In [20]:
print("Restoring/Initializing the model state...")
directory = "./bonsaiconv"
filename = directory + "/model"  #filename to save model
try:
    os.stat(directory)
except:
    os.mkdir(directory) 
with tf.name_scope('hidden') as scope:
    with tf.Session() as sess:
        saver = tf.train.Saver()
        try:
            saver.restore(sess, filename)
        except:
            sess.run(init_op)
            
###   __ uncomment if using tensorboard__
#         writer = tf.summary.FileWriter('convbonsai')
#         writer.add_graph(sess.graph)
        
        saver.save(sess, filename)
        totalEpochs = 10
        batchSize = np.maximum(1000, int(np.ceil(np.sqrt(Ytrain.shape[0]))))
        for i in range(5):
            bonsaiTrainer.train(batchSize, totalEpochs, sess, Xtrain, Xtest, Ytrain, Ytest, saver, filename,1)
            saver.save(sess, filename + str(i))
            print("Done sequence ", i)

Restoring/Initializing the model state...

Epoch Number: 0
Train Loss: 2.2191270446777343 Train accuracy: 0.164200000166893
SigmaI : 1 :LR: 0.01
Test accuracy 0.1588
MarginLoss + RegLoss: 2.3659592 + 0.00397164 = 2.3699307

Epoch Number: 1
Train Loss: 1.9791626977920531 Train accuracy: 0.26801999926567077
SigmaI : 1 :LR: 0.01
Test accuracy 0.2406
MarginLoss + RegLoss: 2.1238356 + 0.0038187152 = 2.1276543

Epoch Number: 2
Train Loss: 1.8547274899482726 Train accuracy: 0.3238200014829636
SigmaI : 1 :LR: 0.01
Test accuracy 0.2657
MarginLoss + RegLoss: 2.093099 + 0.003893574 = 2.0969927

Epoch Number: 3
Train Loss: 1.8013080430030823 Train accuracy: 0.34439999997615817
SigmaI : 1 :LR: 0.01
Test accuracy 0.2907
MarginLoss + RegLoss: 2.0603678 + 0.00400654 = 2.0643744

Epoch Number: 4
Train Loss: 1.7617854833602906 Train accuracy: 0.36010000050067903
SigmaI : 1 :LR: 0.01
Test accuracy 0.3078
MarginLoss + RegLoss: 2.0451021 + 0.0041098446 = 2.049212

Epoch Number: 5
Train Loss: 1.732034952640

W0820 20:16:05.446081 139896027801408 deprecation.py:323] From /home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py:960: remove_checkpoint (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to delete files with this prefix.


Test accuracy 0.3269
MarginLoss + RegLoss: 2.858096 + 0.0058098254 = 2.8639057

Maximum Test accuracy at compressed model size(including early stopping): 0.336 at Epoch: 2
Final Test Accuracy: 0.3269
Done sequence  3

Epoch Number: 0
Train Loss: 1.4832683396339417 Train accuracy: 0.46901999950408935
SigmaI : 1 :LR: 0.01
Test accuracy 0.3265
MarginLoss + RegLoss: 2.8746254 + 0.0058524865 = 2.880478

Epoch Number: 1
Train Loss: 1.4802012300491334 Train accuracy: 0.4698999977111816
SigmaI : 1 :LR: 0.01
Test accuracy 0.3261
MarginLoss + RegLoss: 2.8892198 + 0.005896507 = 2.8951163

Epoch Number: 2
Train Loss: 1.47764310836792 Train accuracy: 0.4710599988698959
SigmaI : 1 :LR: 0.01
Test accuracy 0.3257
MarginLoss + RegLoss: 2.9106605 + 0.0059384 = 2.9165988

Epoch Number: 3
Train Loss: 1.4750825381278991 Train accuracy: 0.4724400007724762
SigmaI : 1 :LR: 0.01
Test accuracy 0.3259
MarginLoss + RegLoss: 2.9208748 + 0.00598032 = 2.926855

Epoch Number: 4
Train Loss: 1.4727976417541504 Train ac

### Analysis

Checking Sparsity 

In [14]:
def calc_zero_ratios(tree):
    zs = np.sum(np.abs(tree.Z.eval())>0.000000000000001)
    ws = np.sum(np.abs(tree.W.eval())>0.000000000000001)
    vs = np.sum(np.abs(tree.V.eval())>0.000000000000001)
    ts = np.sum(np.abs(tree.T.eval())>0.000000000000001)
    print('Number of non zeros achieved...\nW:',ws,'\nV:',vs,'\nT:',ts,'\nZ:',zs)
    var = (ws+vs+ts)
    return var

In [15]:
image = Xtrain[:10000,:]

In [18]:
with tf.Session() as sess:
    saver = tf.train.Saver()
    saver.restore(sess, filename)
    var = calc_zero_ratios(tree)
    _feed_dict = {bonsaiTrainer.X:(image*255).astype(int).reshape(-1,dDims),bonsaiTrainer.sigmaI:float(1)}
    
    start = time.time()
    val = sess.run([tree.prediction, tree.wts, tree.bs, tree.convs, tree.kernelsT, tree.cnodeProb], feed_dict=_feed_dict)
    end = time.time()
    
    size = 0
    for i in range(len(val[1])):
        size += np.sum(val[1][i]>0.0000000001)
        size += np.sum(val[2][i]>0.0000000001)

    print('Number of non_zero paramters : ',var + size,' Time taken : ', end-start)

Sparse ratios achieved...
W: 4760 
V: 4760 
T: 204 
Z: 4


InvalidArgumentError: input and filter must have the same depth: 3 vs 1
	 [[node ConvNode0/Conv2D (defined at <ipython-input-10-92afeeeed7e0>:149) ]]

Errors may have originated from an input operation.
Input Source operations connected to node ConvNode0/Conv2D:
 Reshape_1 (defined at <ipython-input-10-92afeeeed7e0>:135)	
 kernelT0/read (defined at <ipython-input-10-92afeeeed7e0>:88)

Original stack trace for 'ConvNode0/Conv2D':
  File "/home/abhikcr/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/abhikcr/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/abhikcr/anaconda3/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/abhikcr/anaconda3/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/abhikcr/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2855, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
    return runner(coro)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3058, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-8d9fd02fc719>", line 21, in <module>
    sZ = 0.995, sW = 0.995, sV = 0.995, sT = 0.995)
  File "<ipython-input-11-28d7b6900192>", line 44, in __init__
    self.score, self.X_ = self.tree(self.X, self.sigmaI)
  File "<ipython-input-10-92afeeeed7e0>", line 149, in __call__
    strides = self.strides[0]), name = 'convT0')
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1953, in conv2d
    name=name)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 1071, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/home/abhikcr/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()


Checking Categorization effect

In [None]:
LL = []
LR = []
RR = []
RL = []
for i in range(len(val[0])):

    if(np.round(val[-1][:,:,i])[0] == 1 and np.round(val[-1][:,:,i])[2] == 1):
        LL.append(val[0][i])
    elif(np.round(val[-1][:,:,i])[0] == 1 and np.round(val[-1][:,:,i])[3] == 1):
        LR.append(val[0][i])
    elif(np.round(val[-1][:,:,i])[1] == 1 and np.round(val[-1][:,:,i])[4] == 1):
        RL.append(val[0][i])
    elif(np.round(val[-1][:,:,i])[1] == 1 and np.round(val[-1][:,:,i])[5] == 1):
        RR.append(val[0][i])
    else:
        pass

In [5]:
np.unique(RL, return_counts = True)
np.unique(LL, return_counts = True)
np.unique(RR, return_counts = True)
np.unique(LR, return_counts = True)

NameError: name 'RL' is not defined