In [1]:
import os
import numpy as np
caffe_root = '/home/flathead/bitbucket/mycaffe/'
os.chdir('../../')
import sys
sys.path.insert(0, 'python')
import caffe

In [2]:
from pylab import *
%matplotlib inline

In [3]:
import matplotlib.pyplot as plt
from numpy.random import normal

In [4]:
caffe.set_mode_gpu()
caffe.set_device(0)

In [5]:
solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.SGDSolver('examples/mnist/lenet_solver.prototxt')

In [6]:
net1 = solver.net
test_net = solver.test_nets[0]

In [7]:
# during training, each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in net1.blobs.items()]

[('data', (64, 1, 28, 28)),
 ('label', (64,)),
 ('conv2', (64, 50, 24, 24)),
 ('pool2', (64, 50, 12, 12)),
 ('ip1', (64, 500)),
 ('ip2', (64, 10)),
 ('loss', ())]

In [8]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in net1.params.items()]

[('conv2', (50, 1, 5, 5)), ('ip1', (500, 7200)), ('ip2', (10, 500))]

In [9]:
test_net.forward()

{'accuracy': array(0.07999999821186066, dtype=float32),
 'loss': array(2.336747884750366, dtype=float32)}

In [10]:
feat1 = net1.params['ip1'][0].data
feat2 = net1.params['ip2'][0].data

In [11]:
l1 = 500
l2 = 10

In [12]:
%%time
niter = 25000
test_interval = 5000
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))
output = zeros((niter, 8, 10))

for it in range(niter):
    solver.step(1)
    #net1.forward()
    
    train_loss[it] = net1.blobs['loss'].data
    
    net1.forward(start='conv2')
    output[it] = test_net.blobs['ip2'].data[:8]

    if it % test_interval == 0:
        print 'iteration', it, 'testing...'
        correct = 0
        for test_it in range(100):
            test_net.forward()
            correct += sum(test_net.blobs['ip2'].data.argmax(1) == test_net.blobs['label'].data)
        test_acc[it // test_interval] = correct / 1e4

iteration 0 testing...
iteration 5000 testing...
iteration 10000 testing...
iteration 15000 testing...
iteration 20000 testing...
CPU times: user 1min 56s, sys: 16.8 s, total: 2min 13s
Wall time: 1min 52s


In [13]:
net1.save('examples/mnist/baseline.caffemodel')

In [None]:
plt.figure(figsize=(15, 15))

plt.subplot(311)
plt.hist(feat1[0].flat, bins= 500, range=(-1, 1))
plt.title("ip1_weight")

plt.subplot(312)
plt.hist(feat2[0].flat, bins= 500, range=(-1, 1))
plt.title("ip2_weight")

In [14]:
#Neurons of Layer 1
mean_feat1 = [0] * l1
std_feat1 = [0] * l1

for j in range(l1):
    mean_feat1[j] = feat1[j].mean()
    std_feat1[j] = feat1[j].std()
    
#Neurons of Layer 2
mean_feat2 = [0] * l2
std_feat2 = [0] * l2

for j in range(l2):
    mean_feat2[j] = feat2[j].mean()
    std_feat2[j] = feat2[j].std()

In [15]:
#Neurons of Layer 1
posm_feat1 = [0] * l1
negm_feat1 = [0] * l1
poss_feat1 = [0] * l1
negs_feat1 = [0] * l1

for j in range(l1):
    posm_feat1[j] = feat1[j][feat1[j] > 0].mean()
    negm_feat1[j] = feat1[j][feat1[j] < 0].mean()
    poss_feat1[j] = feat1[j][feat1[j] > 0].std()
    negs_feat1[j] = feat1[j][feat1[j] < 0].std()
    
#Neurons of Layer 2
posm_feat2 = [0] * l2
negm_feat2 = [0] * l2
poss_feat2 = [0] * l2
negs_feat2 = [0] * l2

for j in range(l2):
    posm_feat2[j] = feat2[j][feat2[j] > 0].mean()
    negm_feat2[j] = feat2[j][feat2[j] < 0].mean()
    poss_feat2[j] = feat2[j][feat2[j] > 0].std()
    negs_feat2[j] = feat2[j][feat2[j] < 0].std()

In [18]:
# 0.8 * std range zero pruning
a1 = [0] * l1
a2 = [0] * l1
b1 = [0] * l2
b2 = [0] * l2

for x in range(l1):
    a1[x] = (posm_feat1[x] - 0) * 0.8
    a2[x] = (0 - negm_feat1[x]) * 0.8
for y in range(l2):
    b1[y] = (posm_feat2[y] - 0) * 0.8
    b2[y] = (0 - negm_feat2[y]) * 0.8
    
for x in range(l1):
    for y in range(7200):
        if ((feat1[x][y] >= -a2[x]) & (feat1[x][y] <= a1[x])):
            feat1[x][y] = 0
            
for x in range(l2):
    for y in range(l1):
        if (feat2[x][y] >= -b2[x]) & (feat2[x][y] <= b1[x]):
            feat2[x][y] = 0

In [None]:
plt.figure(figsize=(15, 15))

plt.subplot(311)
plt.hist(feat1[0].flat, bins= 500, range=(-1, 1))
plt.title("ip1_weight")

plt.subplot(312)
plt.hist(feat2[0].flat, bins= 500, range=(-1, 1))
plt.title("ip2_weight")

In [None]:
for x in range(l1):
    print (feat1[x] == 0).sum()
for y in range(l2):
    print (feat2[y] == 0).sum()

In [19]:
net1.save('examples/mnist/zeropr/pruned.caffemodel')

In [20]:
#Neurons of Layer 1
posm_feat1 = [0] * l1
negm_feat1 = [0] * l1
poss_feat1 = [0] * l1
negs_feat1 = [0] * l1

for j in range(l1):
    posm_feat1[j] = feat1[j][feat1[j] > 0].mean()
    negm_feat1[j] = feat1[j][feat1[j] < 0].mean()
    poss_feat1[j] = feat1[j][feat1[j] > 0].std()
    negs_feat1[j] = feat1[j][feat1[j] < 0].std()
    
#Neurons of Layer 2
posm_feat2 = [0] * l2
negm_feat2 = [0] * l2
poss_feat2 = [0] * l2
negs_feat2 = [0] * l2

for j in range(l2):
    posm_feat2[j] = feat2[j][feat2[j] > 0].mean()
    negm_feat2[j] = feat2[j][feat2[j] < 0].mean()
    poss_feat2[j] = feat2[j][feat2[j] > 0].std()
    negs_feat2[j] = feat2[j][feat2[j] < 0].std()

In [21]:
# Mean value Quantization

for x in range(l1):
    for y in range(7200):
        if (feat1[x][y] > 0):
            feat1[x][y] = posm_feat1[x]
        if (feat1[x][y] < 0):
            feat1[x][y] = negm_feat1[x]
            
for x in range(l2):
    for y in range(l1):
        if (feat2[x][y] > 0):
            feat2[x][y] = posm_feat2[x]
        if (feat2[x][y] < 0):
            feat2[x][y] = negm_feat2[x]

In [22]:
net1.save('examples/mnist/zeropr/quantization.caffemodel')

In [None]:
for x in range(l1):
    print ((feat1[x] == posm_feat1[x]) | (feat1[x] == negm_feat1[x]) | (feat1[x] == 0)).sum()
for y in range(l2):
    print ((feat2[y] == posm_feat2[y]) | (feat2[y] == negm_feat2[y]) | (feat2[y] == 0)).sum()

In [23]:
# Same abolute values of posm and negm

for x in range(l1):
    for y in range(7200):
        if (feat1[x][y] > 0):
            feat1[x][y] = (posm_feat1[x] - negm_feat1[x]) / 2
        if (feat1[x][y] < 0):
            feat1[x][y] = (negm_feat1[x] - posm_feat1[x]) / 2
            
for x in range(l2):
    for y in range(l1):
        if (feat2[x][y] > 0):
            feat2[x][y] = (posm_feat2[x] - negm_feat2[x]) / 2
        if (feat2[x][y] < 0):
            feat2[x][y] = (negm_feat2[x] - posm_feat2[x]) / 2

In [24]:
net1.save('examples/mnist/zeropr/same_abs.caffemodel')

In [None]:
plt.figure(figsize=(15, 100))
#plt.hist(train_net.params['ip1'][0].data)
for j in range(1,21):
    plt.subplot(21,1,j)
    plt.hist(feat1[j-1], bins = 784)
    plt.title(j)

In [None]:
for x in range(l1):
    print [(posm_feat1[x] + negm_feat1[x])/2]
           
for x in range(l2):
    print [(posm_feat2[x] + negm_feat2[x])/2]

In [None]:
for x in range(l1):
    print ((feat1[x] == (posm_feat1[x] - negm_feat1[x])/2) | (feat1[x] == -(posm_feat1[x] - negm_feat1[x])/2) | (feat1[x] == 0)).sum()
for y in range(l2):
    print ((feat2[y] == (posm_feat2[y] - negm_feat2[y])/2) | (feat2[y] == -(posm_feat2[y] - negm_feat2[y])/2) | (feat2[y] == 0)).sum()

In [None]:
net1.blobs['ip1'].data[63].std()