In [1]:
import caffe

import numpy as np
from pylab import *
%matplotlib inline
caffe.set_device(0)
caffe.set_mode_gpu()

solver = caffe.SGDSolver('solver3.prototxt')

In [2]:
solver.solve()

In [None]:
from caffe import layers as L
from caffe import params as P

def lenet(lmdb, batch_size):
    n = caffe.NetSpec()
    n.data, n.label = L.Data(batch_size=batch_size, source=lmdb, backend=P.Data.LMDB, ntop=2)
    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.AVE)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.AVE)
    n.ip1 = L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.ip1, in_place=True)
    n.ip2 = L.InnerProduct(n.relu1, num_output=20, weight_filler=dict(type='xavier'))
    n.relu2 = L.ReLU(n.ip2, in_place=True)
    n.ip3 = L.InnerProduct(n.relu2, num_output=1, weight_filler=dict(type='xavier'))
    n.loss = L.EuclideanLoss(n.ip3, n.label)
    return n.to_proto()

In [None]:
with open('train.prototxt', 'w') as f:
    f.write(str(lenet('dick1', 64)))

with open('test.prototxt', 'w') as f:
    f.write(str(lenet('dick2', 100)))

In [None]:
!cat solver.prototxt

In [None]:
caffe.set_device(0)
caffe.set_mode_gpu()

solver = caffe.SGDSolver('solver.prototxt')
solver.solve()

In [None]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

In [None]:
# just print the weight sizes (not biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

In [None]:
print solver.net.forward()  # train net
solver.test_nets[0].forward()  # test net (there can be more than 

In [None]:
print solver.net.blobs['data'].data[:10, 0].shape

# we use a little trick to tile the first eight images
imshow(solver.net.blobs['data'].data[:5, 0].transpose(1, 0, 2).reshape(28, 5*28), cmap='gray')
print solver.net.blobs['label'].data[:10, 0]
print solver.net.blobs['ip3'].data[:10, 0]


In [None]:
imshow(solver.test_nets[0].blobs['data'].data[:1, 0].transpose(1, 0, 2).reshape(28, 28), cmap='gray')
print solver.test_nets[0].blobs['label'].data - solver.test_nets[0].blobs['ip3'].data
print solver.net.params['ip3'][0].diff

In [None]:
solver.step(1)
print solver.net.blobs['loss'].data

In [None]:
solver.test_nets[0].forward()
print solver.test_nets[0].blobs['ip3'].data

In [None]:
imshow(solver.net.params['conv1'][0].diff[:, 0].reshape(4, 5, 5, 5)
       .transpose(0, 2, 1, 3).reshape(4*5, 5*5), cmap='gray')

In [None]:
_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')

In [None]:
solver.solve()
solver.net.save("model_big")

In [None]:
%%time
niter = 200
test_interval = 25
# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data
    
    # store the output on the first test batch
    # (start the forward pass at conv1 to avoid loading new data)
    solver.test_nets[0].forward(start='conv1')
    
    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        print 'Iteration', it, 'testing...'
        correct = 0
        for test_it in range(100):
            solver.test_nets[0].forward()
            correct += np.linalg.norm((solver.test_nets[0].blobs['ip3'].data - solver.test_nets[0].blobs['label'].data))
        test_acc[it // test_interval] = correct / 1e4