[caffe2] Shape inference for UnPackRecords

Summary: Since UnPackRecords is part of the graph, we need to add shape inference for it to make it work e2e with tvm_jit_op. Because the input is packed, shape inference is impossible without shape info of the packed tensors. Some context, the shape of the packed tensor is 1 X num_embeddings X embedding_size, with 1 being the batch_size. The shape of the corresponding output tensor is thus batch_size X num_embeddings X embedding_size after concatenating the packed tensors on the batch axis. Therefore two more gflags need to be added - caffe2_predictor_num_embeddings - caffe2_predictor_embedding_size These gflags are then added to the UnPackRecordsOp in the predict_net as args to pass the info to c2_frontend so TVM can do its own shape inference. Reviewed By: yinghai Differential Revision: D21286983 fbshipit-source-id: e9a19cb6b564905282a771df2b9d211d5d37dd71
pytorch · May 4, 2020 · e26631b · e26631b
1 parent bd9617d
commit e26631b
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 24 deletions.
diff --git a/caffe2/opt/bound_shape_inferencer.cc b/caffe2/opt/bound_shape_inferencer.cc
@@ -157,6 +157,8 @@ void BoundShapeInferencer::InferOps(
       op.type() == "HalfToFused4BitRowwiseQuantized" ||
       op.type() == "FloatToHalf" || op.type() == "FbGemmPack") {
     InferQuantizationTransformation(op);
+  } else if (op.type() == "UnPackRecords") {
+    InferUnPackRecords(op);
   } else {
     InferCommonOp(op);
   }
@@ -707,6 +709,54 @@ void BoundShapeInferencer::InferQuantizationTransformation(
   current_dim_type_ = previous_dim_type;
 }
 
+void BoundShapeInferencer::InferUnPackRecords(const OperatorDef& op) {
+  std::vector<TensorShape> input_shapes;
+  for (const auto& input : op.input()) {
+    const auto it = shape_info_.find(input);
+    if (it == shape_info_.end()) {
+      LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping "
+                   << op.type();
+      return;
+    }
+    input_shapes.emplace_back(it->second.shape);
+  }
+
+  std::vector<TensorShape> output_shapes;
+
+  ArgumentHelper helper(op);
+  std::vector<std::string> fields =
+      helper.GetRepeatedArgument<std::string>("fields");
+
+  const int num_tensors = fields.size();
+  if (spec_.max_batch_size == 1 && num_tensors == 1 &&
+      input_shapes[0].dims_size() != 1) {
+    // Special case of single tensor input
+    output_shapes.push_back(input_shapes[0]);
+  } else {
+    // Input is packed
+    TensorShape oshape;
+    oshape.add_dims(spec_.max_batch_size);
+    oshape.add_dims(spec_.num_embeddings);
+    oshape.add_dims(spec_.embedding_length);
+    // TODO: how to do this more intelligently
+    oshape.set_data_type(TensorProto::FLOAT);
+    for (int i = 0; i < num_tensors; i++) {
+      output_shapes.push_back(oshape);
+    }
+  }
+
+  for (int i = 0; i < output_shapes.size(); i++) {
+    const auto& shape = output_shapes[i];
+
+    CheckAndSetTensorBoundShape(
+        op.output(i),
+        setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
+        ConvertToVec(shape.dims()),
+        output_shapes[i].data_type(),
+        false);
+  }
+}
+
 void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
   // First, we need to check that all the input shape/types are already
   // presented
@@ -726,7 +776,6 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
     CAFFE_ENFORCE(schema);
     std::vector<TensorShape> output_shapes;
     output_shapes = schema->InferTensor(op, input_shapes);
-    int i = 0;
     bool is_quantized =
         !(op.type().compare(0, 4, "Int8")) && (op.type() != "Int8Dequantize");
     TensorProto::DataType infered_data_type = TensorProto::UNDEFINED;
@@ -754,16 +803,16 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
       infered_data_type = TensorProto::FLOAT;
     }
 
-    for (const auto& shape : output_shapes) {
+    for (int i = 0; i < output_shapes.size(); i++) {
+      const auto& shape = output_shapes[i];
       if (infered_data_type == TensorProto::UNDEFINED) {
         infered_data_type = shape.data_type();
       }
       if (shape.unknown_shape()) {
-        ++i;
         continue;
       }
       CheckAndSetTensorBoundShape(
-          op.output(i++),
+          op.output(i),
           setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
           ConvertToVec(shape.dims()),
           infered_data_type,

diff --git a/caffe2/opt/bound_shape_inferencer.h b/caffe2/opt/bound_shape_inferencer.h
@@ -17,9 +17,20 @@ namespace caffe2 {
 // max_seq_size.
 struct CAFFE2_API BoundShapeSpec {
   explicit BoundShapeSpec(int64_t b, int64_t q)
-      : max_batch_size(b), max_seq_size(q) {}
+      : max_batch_size(b),
+        max_seq_size(q),
+        num_embeddings(0),
+        embedding_length(0) {}
+  explicit BoundShapeSpec(int64_t b, int64_t q, int64_t n, int64_t e)
+      : max_batch_size(b),
+        max_seq_size(q),
+        num_embeddings(n),
+        embedding_length(e) {}
   int64_t max_batch_size;
   int64_t max_seq_size;
+  // The following two parameters are for shape inference of UnPackRecords
+  int64_t num_embeddings;
+  int64_t embedding_length;
 };
 
 /// \class A class that does bound shape inference given a C2 net. Depending on
@@ -118,6 +129,7 @@ class CAFFE2_API BoundShapeInferencer : public BoundShapeInferencerBase {
   void InferReshape(const OperatorDef& op);
   void InferLengthsRangeFill(const OperatorDef& op);
   void InferQuantizationTransformation(const OperatorDef& op);
+  void InferUnPackRecords(const OperatorDef& op);
 
   // Standard shape/type inference using op schema registered shape inference
   // function

diff --git a/caffe2/opt/tvm_transformer.cc b/caffe2/opt/tvm_transformer.cc
@@ -1,16 +1,6 @@
 #include "caffe2/opt/tvm_transformer.h"
 #include "caffe2/opt/backend_cutting.h"
 
-C10_DEFINE_bool(
-    caffe2_tvm_profiling_based_jit,
-    false,
-    "Use profiling based jit for TVM transform");
-
-C10_DEFINE_int32(
-    caffe2_tvm_min_ops,
-    8,
-    "Minimal number of supported ops for the subgraph to be lowered to TVM");
-
 namespace caffe2 {
 
 NetDef TvmTransformer::buildTvmOp(
@@ -298,15 +288,21 @@ void tvmTransform(
     const std::vector<std::string>& weight_names,
     const ShapeInfoMap& shape_hints,
     const std::unordered_set<int>& blacklisted_ops,
-    size_t max_batch_size,
-    size_t max_seq_size,
+    int32_t max_batch_size,
+    int32_t max_seq_size,
+    int32_t num_embeddings,
+    int32_t embedding_size,
+    int32_t tvm_min_ops,
+    bool tvm_profiling_based_jit,
     bool debug) {
   TvmTransformOptions opts;
   opts.bound_shape_spec.max_batch_size = max_batch_size;
   opts.bound_shape_spec.max_seq_size = max_seq_size;
+  opts.bound_shape_spec.num_embeddings = num_embeddings;
+  opts.bound_shape_spec.embedding_length = embedding_size;
+  opts.min_ops = tvm_min_ops;
+  opts.profiling_based_jit = tvm_profiling_based_jit;
   opts.debug = debug;
-  opts.profiling_based_jit = FLAGS_caffe2_tvm_profiling_based_jit;
-  opts.min_ops = FLAGS_caffe2_tvm_min_ops;
   TvmTransformer ts(opts);
 
   // Clean up the external input/output of the net

diff --git a/caffe2/opt/tvm_transformer.h b/caffe2/opt/tvm_transformer.h
@@ -4,15 +4,13 @@
 
 #include <unordered_set>
 
-C10_DECLARE_bool(caffe2_tvm_profiling_based_jit);
-
 namespace caffe2 {
 
 struct TvmTransformOptions final : public BackendTransformOptions {
   explicit TvmTransformOptions() : BackendTransformOptions() {}
 
   //  Whether to enable profiling based jit
-  bool profiling_based_jit{true};
+  bool profiling_based_jit{false};
 };
 
 class CAFFE2_API TvmTransformer final : public BackendTransformerBase {
@@ -78,8 +76,12 @@ CAFFE2_API void tvmTransform(
     const std::vector<std::string>& weight_names,
     const ShapeInfoMap& shape_hints,
     const std::unordered_set<int>& blacklisted_ops,
-    size_t max_batch_size,
-    size_t max_seq_size,
+    int32_t max_batch_size,
+    int32_t max_seq_size,
+    int32_t num_embeddings,
+    int32_t embedding_size,
+    int32_t tvm_min_ops,
+    bool tvm_profiling_based_jit,
     bool debug);
 
 CAFFE2_API void cleanUpPredictNet(