# High-level Caffe2 Example

In [1]:
import os
import sys
import caffe2
import numpy as np
from caffe2.python import core, model_helper, net_drawer, workspace, visualize, brew, optimizer, utils
from caffe2.proto import caffe2_pb2
from common.params import *
from common.utils import *

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.13.1
GPU:  ['Tesla K80', 'Tesla K80']


In [3]:
if GPU:
    device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)  # Run on GPU
else:
    device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)  # Run on CPU

In [4]:
def create_model(m, device_opts) :
    with core.DeviceScope(device_opts):
        conv1 = brew.conv(m, 'data', 'conv1', dim_in=3, dim_out=50, kernel=3, pad=1, no_gradient_to_input=1)
        relu1 = brew.relu(m, conv1, 'relu1')
        conv2 = brew.conv(m, relu1, 'conv2', dim_in=50, dim_out=50, kernel=3, pad=1)
        pool1 = brew.max_pool(m, conv2, 'pool1', kernel=2, stride=2)
        relu2 = brew.relu(m, pool1, 'relu2')
        drop1 = brew.dropout(m, relu2, 'drop1', ratio=0.25)

        conv3 = brew.conv(m, drop1, 'conv3', dim_in=50, dim_out=100, kernel=3, pad=1)
        relu3 = brew.relu(m, conv3, 'relu3')
        conv4 = brew.conv(m, relu3, 'conv4', dim_in=100, dim_out=100, kernel=3, pad=1)
        pool2 = brew.max_pool(m, conv4, 'pool2', kernel=2, stride=2)   
        relu4 = brew.relu(m, pool2, 'relu4')
        drop2 = brew.dropout(m, relu4, 'drop2', ratio=0.25)
        
        fc1 = brew.fc(m, drop2, 'fc1', dim_in=100 * 8 * 8, dim_out=512)
        relu5 = brew.relu(m, fc1, 'relu5')
        drop3 = brew.dropout(m, relu5, 'drop3', ratio=0.5)
        
        fc2 = brew.fc(m, drop3, 'fc2', dim_in=512, dim_out=N_CLASSES)
        softmax = brew.softmax(m, fc2, 'softmax')
        return softmax

In [5]:
def add_training_operators(softmax, m, device_opts) :
    with core.DeviceScope(device_opts):
        xent = m.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = m.AveragedLoss(xent, "loss")
        #brew.accuracy(m, [softmax, "label"], "accuracy")
        m.AddGradientOperators([loss])
        opt = optimizer.build_sgd(
            m,
            base_learning_rate=LR, 
            policy='fixed',
            momentum=MOMENTUM)

In [6]:
def init_model():
    # Create Place-holder for data
    workspace.FeedBlob("data", x_train[:BATCHSIZE], device_option=device_opts)
    workspace.FeedBlob("label", y_train[:BATCHSIZE], device_option=device_opts)
    
    # Initialise model
    train_arg_scope = {
        'order': 'NCHW',
        'use_cudnn': True,
        'cudnn_exhaustive_search': True,
        'ws_nbytes_limit': (64 * 1024 * 1024),
    }
    train_model = model_helper.ModelHelper(
        name="train_net", arg_scope=train_arg_scope
    )
    softmax = create_model(train_model, device_opts=device_opts)
    add_training_operators(softmax, train_model, device_opts=device_opts)

    # Initialise workspace
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net)
    return train_model

In [7]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Downloading http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Done.
Extracting files...
Done.
Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 2.98 s, sys: 1.48 s, total: 4.46 s
Wall time: 38.2 s


In [8]:
%%time
# Initialise model
model = init_model()

CPU times: user 500 ms, sys: 552 ms, total: 1.05 s
Wall time: 1.14 s


In [9]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Run one mini-batch at time
        workspace.FeedBlob("data", data, device_option=device_opts)
        workspace.FeedBlob("label", label, device_option=device_opts)
        workspace.RunNet(model.net)       
    print("Finished epoch: ", j)
    print(str(j) + ': ' + str(workspace.FetchBlob("loss")))

Finished epoch:  0
0: 1.242152214050293
Finished epoch:  1
1: 1.1519280672073364
Finished epoch:  2
2: 0.9268437027931213
Finished epoch:  3
3: 0.7174981832504272
Finished epoch:  4
4: 0.6170254349708557
Finished epoch:  5
5: 0.5968530774116516
Finished epoch:  6
6: 0.789893627166748
Finished epoch:  7
7: 0.5958438515663147
Finished epoch:  8
8: 0.8032889366149902
Finished epoch:  9
9: 0.53541100025177
CPU times: user 2min 2s, sys: 27 s, total: 2min 29s
Wall time: 2min 29s


In [10]:
%%time
# Init test model
test_arg_scope = {
    'order': 'NCHW',
    'use_cudnn': True,
    'cudnn_exhaustive_search': True,
    'ws_nbytes_limit': (64 * 1024 * 1024),
    'is_test': True,
}
test_model= model_helper.ModelHelper(name="test_net", init_params=False, arg_scope=test_arg_scope)
create_model(test_model, device_opts=device_opts)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

# Run test
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    workspace.FeedBlob("data", data, device_option=device_opts)
    workspace.RunNet(test_model.net)
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = (np.argmax(workspace.FetchBlob("softmax"), axis=-1))
    c += 1

CPU times: user 860 ms, sys: 168 ms, total: 1.03 s
Wall time: 1.15 s


In [11]:
print("Accuracy: ", sum(y_guess == y_truth)/float(len(y_guess)))

Accuracy:  0.790865384615
