In [1]:
import io
import numpy as np
import torch
import torch.onnx
import torchvision.models as models
from torch import nn
from torch.autograd import Variable

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("Pytorch: ", torch.__version__)

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.13.1
Pytorch:  0.4.0a0+0fd9682


In [3]:
!cat /proc/cpuinfo | grep processor | wc -l

12


In [4]:
!nvidia-smi --query-gpu=gpu_name --format=csv

name
Tesla K80
Tesla K80


In [5]:
BATCH_SIZE = 32
RESNET_FEATURES = 2048
BATCHES_GPU = 40
BATCHES_CPU = 8

In [6]:
def give_fake_data(batches):
    """ Create an array of fake data to run inference on"""
    np.random.seed(0)
    dta = np.random.rand(BATCH_SIZE*batches, 224, 224, 3).astype(np.float32)
    return dta, np.swapaxes(dta, 1, 3)

In [7]:
def yield_mb(X, batchsize):
    """ Function yield (complete) mini_batches of data"""
    for i in range(len(X)//batchsize):
        yield i, X[i*batchsize:(i+1)*batchsize]

In [8]:
# Create batches of fake data
fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_CPU)
print(fake_input_data_cl.shape, fake_input_data_cf.shape)

(256, 224, 224, 3) (256, 3, 224, 224)


In [9]:
resnet50 = models.resnet50(pretrained=True)
chopped_resnet50 = torch.nn.Sequential(*list(resnet50.children())[:-1])

In [10]:
#chopped_resnet50.cuda()
#print("")

In [11]:
chopped_resnet50.train(False)
print("")




In [12]:
def predict_fn(classifier, data, batchsize):
    """ Return features from classifier """
    classifier.eval()
    out = np.zeros((len(data), RESNET_FEATURES), np.float32)
    for idx, dta in yield_mb(data, batchsize):
        pred = classifier(Variable(torch.FloatTensor(dta)))
        out[idx*batchsize:(idx+1)*batchsize] = pred.data.numpy().squeeze()
    return out

In [13]:
%%time
# GPU: 10.6
features = predict_fn(chopped_resnet50, fake_input_data_cf, BATCH_SIZE)

CPU times: user 8min 2s, sys: 26.4 s, total: 8min 28s
Wall time: 44.6 s


In [14]:
# Input to the model
x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224), requires_grad=True)

# Export the model
torch_out = torch.onnx._export(chopped_resnet50,        # model being run
                               x,                       # model input (or a tuple for multiple inputs)
                               "resnet50_feat.onnx",    # where to save the model (can be a file or file-like object)
                               export_params=True)      # store the trained parameter weights inside the model file

In [15]:
import onnx
import onnx_caffe2.backend

In [16]:
# Load the ONNX ModelProto object. model is a standard Python protobuf object
model = onnx.load("resnet50_feat.onnx")

In [17]:
prepared_backend = onnx_caffe2.backend.prepare(model)

In [18]:
W = {model.graph.input[0].name: fake_input_data_cf}

In [19]:
%%time
c2_out = prepared_backend.run(W)[0]

CPU times: user 3min 46s, sys: 13.2 s, total: 3min 59s
Wall time: 28.3 s


In [22]:
c2_out = c2_out.squeeze()

In [23]:
print(c2_out.shape)
print(features.shape)

(256, 2048)
(256, 2048)


In [33]:
c2_out[0]

array([ 0.35658345,  0.1294719 ,  0.00147031, ...,  0.01680854,
        0.1336776 ,  0.18246289], dtype=float32)

In [35]:
features[0]

array([ 0.35658327,  0.12947206,  0.0014703 , ...,  0.01680852,
        0.13367748,  0.18246318], dtype=float32)

In [32]:
np.testing.assert_almost_equal(features, c2_out, decimal=3)