# Setup/Imports

In [1]:
import sys
import os
from pylab import *
%matplotlib inline

* Import Caffe using 'caffe_root'

In [2]:
caffe_root = '/Users/Kallie/caffe'

sys.path.insert(0, '/Users/Kallie/anaconda3/envs/final/lib')

#Add path for python layers
sys.path.insert(0, '/Users/Kallie/caffe/python/caffe/layers')

sys.path.insert(0, caffe_root + 'python')
import caffe

# Build LeNet Architecture for MNSIT

* LeNet no Dropout

In [3]:
from caffe import layers as L, params as P

def lenet_sparse(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    
    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)
    
    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()
    
with open('mnist/lenet_sparse_train.prototxt', 'w') as f:
    f.write(str(lenet_sparse('mnist/mnist_train_lmdb', 64)))
    
with open('mnist/lenet_sparse_test.prototxt', 'w') as f:
    f.write(str(lenet_sparse('mnist/mnist_test_lmdb', 100)))

* LeNet with All Vanilla Dropout

In [4]:
from caffe import layers as L, params as P

def lenet_all_vanilla_sparse(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    
    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)
    
    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=40, weight_filler=dict(type='xavier'))
    n.drop1 = L.Dropout(n.conv1, dropout_ratio=0.5)
    n.pool1 = L.Pooling(n.drop1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=100, weight_filler=dict(type='xavier'))
    n.drop2 = L.Dropout(n.conv2, dropout_ratio=0.5)
    n.pool2 = L.Pooling(n.drop2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()
    
with open('mnist/lenet_all_vanilla_sparse_auto_train.prototxt', 'w') as f:
    f.write(str(lenet_all_vanilla_sparse('mnist/mnist_train_lmdb', 64)))
    
with open('mnist/lenet_all_vanilla_sparse_auto_test.prototxt', 'w') as f:
    f.write(str(lenet_all_vanilla_sparse('mnist/mnist_test_lmdb', 100)))

* LenNet With All Modified Dropout

In [5]:
from caffe import layers as L, params as P

def lenet_all_mod_sparse(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    
    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)
    
    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.drop1 = L.Python(n.conv1, name='drop1', ntop=1, python_param={'module': 'Dropout_M',
                           'layer': 'Dropout_M_Layer'})
    n.pool1 = L.Pooling(n.drop1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.drop2 = L.Python(n.conv2, name='drop1', ntop=1, python_param={'module': 'Dropout_M',
                           'layer': 'Dropout_M_Layer'})
    n.pool2 = L.Pooling(n.drop2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()
    
with open('mnist/lenet_all_mod_sparse_auto_train.prototxt', 'w') as f:
    f.write(str(lenet_all_mod_sparse('mnist/mnist_train_lmdb', 64)))
    
with open('mnist/lenet_all_mod_sparse_auto_test.prototxt', 'w') as f:
    f.write(str(lenet_all_mod_sparse('mnist/mnist_test_lmdb', 100)))

# Load the Solver
* Change Solver based on LeNet You Want to Run

In [6]:
caffe.set_mode_cpu()

### load the solver and create train and test nets
solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)

# 'mnist/lenet_sparse_solver.prototxt'
# 'mnist/lenet_all_mod_sparse_auto_solver.prototxt'
solver = caffe.SGDSolver('mnist/lenet_all_mod_sparse_auto_solver.prototxt')

# Sanity Checks
* output layer sizes:

In [7]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

[('data', (64, 1, 28, 28)),
 ('label', (64,)),
 ('conv1', (64, 20, 24, 24)),
 ('drop1', (64, 20, 24, 24)),
 ('pool1', (64, 20, 12, 12)),
 ('conv2', (64, 50, 8, 8)),
 ('drop2', (64, 50, 8, 8)),
 ('pool2', (64, 50, 4, 4)),
 ('fc1', (64, 500)),
 ('score', (64, 10)),
 ('loss', ())]

In [8]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

[('conv1', (20, 1, 5, 5)),
 ('conv2', (50, 20, 5, 5)),
 ('fc1', (500, 800)),
 ('score', (10, 500))]

* Loss looks Correct and Check Data is Loaded 

In [None]:
solver.net.forward()  # train net
solver.test_nets[0].forward()  # test net (there can be more than one)

  eps = sigmoid(np.log(data**2))


{'loss': array(3.1977205, dtype=float32)}

# Training Loop

In [None]:
%%time
niter =  2000
test_interval = 100
ep = 1
mod_list = []
# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))
output = zeros((niter, 8, 10))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data
    
    # store the output on the first test batch
    # (start the forward pass at conv1 to avoid loading new data)
    solver.test_nets[0].forward(start='conv1')
    output[it] = solver.test_nets[0].blobs['score'].data[:8]
    
    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        #print('Iteration', it, 'testing...')
        correct = 0
        for test_it in range(100):
            solver.test_nets[0].forward()
            correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / 1e4
     
    
    if it % 156 == 0:

        layer1  = solver.net.blobs['drop1'].data.copy()
        layer2  = solver.net.blobs['drop2'].data.copy()
        
        print(solver.net.blobs['drop1'].items())
        
        layer1[abs(layer1)<1e-9] = 0
        layer2[abs(layer2)<1e-9] = 0
        
        layer1_not_zero = np.count_nonzero(layer1)
        layer2_not_zero = np.count_nonzero(layer2)
        
        nom = (np.prod(layer1.shape) + np.prod(layer2.shape))
        denom = (layer1_not_zero + layer2_not_zero)
        
        sparse = nom/denom
        
        print('Sparsity of Epoch ' + str(ep)+ ' is: ' + str(sparse))
        mod_list.append(sparse)
        ep += 1
mod = np.mean(mod_list)
        

  eps = sigmoid(np.log(data**2))


# Graph Sparsity

In [None]:
_, ax1 = subplots()
print(mod)
mods = []
lenets = [1,1,1,1]
vanillas = [2.0783538235023276, 2.0784431406696475, 2.0778395625890305, 2.0788826576561448]
ax1.plot([.25, .5, 1, 1.5], mods, 'b+-', label='Modified Dropout')
ax1.plot([.25, .5, 1, 1.5], [.25, .5, 1, 1.5], 'c+-', label='No Dropout')
ax1.plot([.25, .5, 1, 1.5], [.25, .5, 1, 1.5], 'g+-', label='Vanilla Dropout')
ax1.set_xlabel('Scaled Number of neurons')
ax1.legend()
ax1.set_ylabel('Sparsity')


#savefig('mnist/figures/sparsity_mnist.jpg')
