Create library for converting pytorch to caffe2 and examples (#69)

* Add helper functions for pytorch->caffe2 conversion, and benchmark. Add an example python file to demonstrate how to convert PyTorch models to Caffe2. * Update the example * Merge name_inuts() into c2_native_run_net * Fix the pad order problem
onnx · Nov 27, 2017 · db94c12 · db94c12
1 parent a3ec934
commit db94c12
Show file tree

Hide file tree

Showing 3 changed files with 172 additions and 8 deletions.
diff --git a/examples/pytorch_to_caffe2.py b/examples/pytorch_to_caffe2.py
@@ -0,0 +1,100 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from torch.autograd import Variable
+from onnx_caffe2.backend import Caffe2Backend
+from onnx_caffe2.helper import c2_native_run_net, save_caffe2_net, load_caffe2_net, \
+    benchmark_caffe2_model, benchmark_pytorch_model
+
+import io
+import logging
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import onnx
+
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+class MNIST(nn.Module):
+
+    def __init__(self):
+        super(MNIST, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x)
+
+# Create a pytorch model.
+log.info("Create a PyTorch model.")
+pytorch_model = MNIST()
+pytorch_model.train(False)
+
+# Make the inputs in tuple format.
+inputs = (Variable(torch.randn(3, 1, 28, 28), requires_grad=True), )
+
+# Export an ONNX model.
+log.info("Export an ONNX model from the PyTorch model.")
+f = io.BytesIO()
+torch.onnx.export(pytorch_model, inputs, f, verbose=True)
+onnx_model = onnx.ModelProto.FromString(f.getvalue())
+
+# Check whether the onnx_model is valid or not.
+log.info("Check the ONNX model.")
+onnx.checker.check_model(onnx_model)
+
+# Convert the ONNX model to a Caffe2 model.
+log.info("Convert the model to a Caffe2 model.")
+init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model.graph, device="CPU")
+
+# Caffe2 model takes a numpy array list as input.
+caffe2_inputs = [var.data.numpy() for var in inputs]
+
+# Save and load the converted Caffe2 model in the protobuf files.
+log.info("Save the Caffe2 models as pb files.")
+init_file = "./mymodel_init.pb"
+predict_file = "./mymodel_predict.pb"
+save_caffe2_net(init_net, init_file, output_txt=False)
+save_caffe2_net(predict_net, predict_file, output_txt=True)
+log.info("Load the Caffe2 models back.")
+init_net = load_caffe2_net(init_file)
+predict_net = load_caffe2_net(predict_file)
+
+# Compute the results using the PyTorch model.
+log.info("Run the PyTorch model.")
+pytorch_results = pytorch_model(*inputs)
+
+# Compute the results using the Caffe2 model.
+log.info("Run the Caffe2 model.")
+_, caffe2_results = c2_native_run_net(init_net, predict_net, caffe2_inputs)
+
+# Check the decimal precision of the exported Caffe2.
+expected_decimal = 5
+for p, c in zip([pytorch_results], caffe2_results):
+    np.testing.assert_almost_equal(p.data.cpu().numpy(), c, decimal=expected_decimal)
+log.info("The exported model achieves {}-decimal precision.".format(expected_decimal))
+
+pytorch_time = benchmark_pytorch_model(pytorch_model, inputs)
+caffe2_time = benchmark_caffe2_model(init_net, predict_net)
+
+print("PyTorch model's execution time is {} milliseconds/ iteration, {} iterations per second.".format(
+    pytorch_time, 1000 / pytorch_time))
+print("Caffe2 model's execution time is {} milliseconds / iteration, {} iterations per second".format(
+    caffe2_time, 1000 / caffe2_time))
diff --git a/onnx_caffe2/backend.py b/onnx_caffe2/backend.py
@@ -260,7 +260,6 @@ def _create_pad(cls, n):
                 set(pads[:2] + pads[4:6]) == {0}):
             raise ValueError('Caffe2 only supports padding 2D Tensor, whereas padding is ' + str(pads))
         pads[:] = pads[2:4] + pads[6:8]
-
         return cls._common_onnx_node_to_caffe2_op(n)
 
     @classmethod
@@ -630,7 +629,8 @@ def _all_names_in_graph(graph):
         return names
 
     @classmethod
-    def onnx_graph_to_caffe2_net(cls, graph_def):
+    def onnx_graph_to_caffe2_net(cls, graph_def, device="CPU"):
+        device_option = get_device_option(Device(device))
         cls._inplace_rewrite(graph_def)
         if graph_def.initializer:
             init_net = cls.onnx_initializer_to_caffe2_init_net(

diff --git a/onnx_caffe2/helper.py b/onnx_caffe2/helper.py
@@ -3,11 +3,19 @@
 from __future__ import print_function
 from __future__ import unicode_literals
 
+from caffe2.proto import caffe2_pb2
 from onnx import helper
 from onnx.backend.base import namedtupledict
 
 from onnx_caffe2.workspace import Workspace
 
+import io
+import logging
+import time
+
+
+log = logging.getLogger(__name__)
+
 
 class _DummyNameFactory(object):
     used_names = set()
@@ -40,11 +48,11 @@ def c2_native_run_op(op_def, inputs):
     ws = Workspace()
     if isinstance(inputs, dict):
         for key, value in inputs.items():
-            ws.FeedBlob(key, value)
+            ws.FeedBlob(key, value, op_def.device_option)
     else:
         assert(len(op_def.input) == len(inputs))
         for key, value in zip(op_def.input, inputs):
-            ws.FeedBlob(key, value)
+            ws.FeedBlob(key, value, op_def.device_option)
 
     ws.RunOperatorOnce(op_def)
 
@@ -60,17 +68,73 @@ def c2_native_run_net(init_net, predict_net, inputs):
 
     if isinstance(inputs, dict):
         for key, value in inputs.items():
-            ws.FeedBlob(key, value)
+            ws.FeedBlob(key, value, predict_net.device_option)
     else:
         uninitialized = [input_name
                          for input_name in predict_net.external_input
                          if not ws.HasBlob(input_name)]
-        assert len(uninitialized) == len(inputs)
-        for key, value in zip(uninitialized, inputs):
-            ws.FeedBlob(key, value)
+        if len(uninitialized) == len(inputs):
+            for key, value in zip(uninitialized, inputs):
+                ws.FeedBlob(key, value, predict_net.device_option)
+        else:
+            # If everything is initialized,
+            # we just initialized the first len(inputs) external_input.
+            assert(len(inputs) <= len(predict_net.external_input))
+            for i in range(len(inputs)):
+                ws.FeedBlob(predict_net.external_input[i], inputs[i],
+                            predict_net.device_option)
 
     ws.RunNetOnce(predict_net)
 
     output_names = predict_net.external_output
     output_values = [ws.FetchBlob(name) for name in output_names]
     return ws, namedtupledict('Outputs', output_names)(*output_values)
+
+
+def load_caffe2_net(file):
+    net = caffe2_pb2.NetDef()
+    with open(file, "rb") as f:
+        net.ParseFromString(f.read())
+    return net
+
+
+def save_caffe2_net(net, file, output_txt=False):
+    with open(file, "wb") as f:
+        f.write(net.SerializeToString())
+    if output_txt:
+        with open(file + "txt", "w") as f:
+            f.write(str(net))
+
+
+def benchmark_caffe2_model(init_net, predict_net, warmup_iters=3, main_iters=10, layer_details=True):
+    '''
+        Run the benchmark net on the target model.
+        Return the execution time per iteration (millisecond).
+    '''
+    ws = Workspace()
+    if init_net:
+        ws.RunNetOnce(init_net)
+    ws.CreateNet(predict_net)
+    results = ws.BenchmarkNet(predict_net.name, warmup_iters, main_iters, layer_details)
+    del ws
+    return results[0]
+
+
+def benchmark_pytorch_model(model, inputs, training=False, warmup_iters=3,
+                            main_iters=10, verbose=False):
+    '''
+        Run the model several times, and measure the execution time.
+        Return the execution time per iteration (millisecond).
+    '''
+    for _i in range(warmup_iters):
+        model(*inputs)
+    total_pytorch_time = 0.0
+    for _i in range(main_iters):
+        ts = time.time()
+        model(*inputs)
+        te = time.time()
+        total_pytorch_time += te - ts
+    log.info("The PyTorch model execution time per iter is {} milliseconds, "
+             "{} iters per second.".format(total_pytorch_time / main_iters * 1000,
+                                           main_iters / total_pytorch_time))
+    return total_pytorch_time * 1000 / main_iters