Skip to content

Commit

Permalink
[caffe2] Shape inference for UnPackRecords
Browse files Browse the repository at this point in the history
Summary:
Since UnPackRecords is part of the graph, we need to add shape inference for it to make it work e2e with tvm_jit_op. Because the input is packed, shape inference is impossible without shape info of the packed tensors. Some context, the shape of the packed tensor is 1 X num_embeddings X embedding_size, with 1 being the batch_size. The shape of the corresponding output tensor is thus batch_size X num_embeddings X embedding_size after concatenating the packed tensors on the batch axis. Therefore two more gflags need to be added

- caffe2_predictor_num_embeddings
- caffe2_predictor_embedding_size

These gflags are then added to the UnPackRecordsOp in the predict_net as args to pass the info to c2_frontend so TVM can do its own shape inference.

Reviewed By: yinghai

Differential Revision: D21286983

fbshipit-source-id: e9a19cb6b564905282a771df2b9d211d5d37dd71
  • Loading branch information
Hao Lu authored and facebook-github-bot committed May 4, 2020
1 parent bd9617d commit e26631b
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 24 deletions.
57 changes: 53 additions & 4 deletions caffe2/opt/bound_shape_inferencer.cc
Expand Up @@ -157,6 +157,8 @@ void BoundShapeInferencer::InferOps(
op.type() == "HalfToFused4BitRowwiseQuantized" ||
op.type() == "FloatToHalf" || op.type() == "FbGemmPack") {
InferQuantizationTransformation(op);
} else if (op.type() == "UnPackRecords") {
InferUnPackRecords(op);
} else {
InferCommonOp(op);
}
Expand Down Expand Up @@ -707,6 +709,54 @@ void BoundShapeInferencer::InferQuantizationTransformation(
current_dim_type_ = previous_dim_type;
}

void BoundShapeInferencer::InferUnPackRecords(const OperatorDef& op) {
std::vector<TensorShape> input_shapes;
for (const auto& input : op.input()) {
const auto it = shape_info_.find(input);
if (it == shape_info_.end()) {
LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping "
<< op.type();
return;
}
input_shapes.emplace_back(it->second.shape);
}

std::vector<TensorShape> output_shapes;

ArgumentHelper helper(op);
std::vector<std::string> fields =
helper.GetRepeatedArgument<std::string>("fields");

const int num_tensors = fields.size();
if (spec_.max_batch_size == 1 && num_tensors == 1 &&
input_shapes[0].dims_size() != 1) {
// Special case of single tensor input
output_shapes.push_back(input_shapes[0]);
} else {
// Input is packed
TensorShape oshape;
oshape.add_dims(spec_.max_batch_size);
oshape.add_dims(spec_.num_embeddings);
oshape.add_dims(spec_.embedding_length);
// TODO: how to do this more intelligently
oshape.set_data_type(TensorProto::FLOAT);
for (int i = 0; i < num_tensors; i++) {
output_shapes.push_back(oshape);
}
}

for (int i = 0; i < output_shapes.size(); i++) {
const auto& shape = output_shapes[i];

CheckAndSetTensorBoundShape(
op.output(i),
setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
ConvertToVec(shape.dims()),
output_shapes[i].data_type(),
false);
}
}

void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
// First, we need to check that all the input shape/types are already
// presented
Expand All @@ -726,7 +776,6 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
CAFFE_ENFORCE(schema);
std::vector<TensorShape> output_shapes;
output_shapes = schema->InferTensor(op, input_shapes);
int i = 0;
bool is_quantized =
!(op.type().compare(0, 4, "Int8")) && (op.type() != "Int8Dequantize");
TensorProto::DataType infered_data_type = TensorProto::UNDEFINED;
Expand Down Expand Up @@ -754,16 +803,16 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
infered_data_type = TensorProto::FLOAT;
}

for (const auto& shape : output_shapes) {
for (int i = 0; i < output_shapes.size(); i++) {
const auto& shape = output_shapes[i];
if (infered_data_type == TensorProto::UNDEFINED) {
infered_data_type = shape.data_type();
}
if (shape.unknown_shape()) {
++i;
continue;
}
CheckAndSetTensorBoundShape(
op.output(i++),
op.output(i),
setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
ConvertToVec(shape.dims()),
infered_data_type,
Expand Down
14 changes: 13 additions & 1 deletion caffe2/opt/bound_shape_inferencer.h
Expand Up @@ -17,9 +17,20 @@ namespace caffe2 {
// max_seq_size.
struct CAFFE2_API BoundShapeSpec {
explicit BoundShapeSpec(int64_t b, int64_t q)
: max_batch_size(b), max_seq_size(q) {}
: max_batch_size(b),
max_seq_size(q),
num_embeddings(0),
embedding_length(0) {}
explicit BoundShapeSpec(int64_t b, int64_t q, int64_t n, int64_t e)
: max_batch_size(b),
max_seq_size(q),
num_embeddings(n),
embedding_length(e) {}
int64_t max_batch_size;
int64_t max_seq_size;
// The following two parameters are for shape inference of UnPackRecords
int64_t num_embeddings;
int64_t embedding_length;
};

/// \class A class that does bound shape inference given a C2 net. Depending on
Expand Down Expand Up @@ -118,6 +129,7 @@ class CAFFE2_API BoundShapeInferencer : public BoundShapeInferencerBase {
void InferReshape(const OperatorDef& op);
void InferLengthsRangeFill(const OperatorDef& op);
void InferQuantizationTransformation(const OperatorDef& op);
void InferUnPackRecords(const OperatorDef& op);

// Standard shape/type inference using op schema registered shape inference
// function
Expand Down
24 changes: 10 additions & 14 deletions caffe2/opt/tvm_transformer.cc
@@ -1,16 +1,6 @@
#include "caffe2/opt/tvm_transformer.h"
#include "caffe2/opt/backend_cutting.h"

C10_DEFINE_bool(
caffe2_tvm_profiling_based_jit,
false,
"Use profiling based jit for TVM transform");

C10_DEFINE_int32(
caffe2_tvm_min_ops,
8,
"Minimal number of supported ops for the subgraph to be lowered to TVM");

namespace caffe2 {

NetDef TvmTransformer::buildTvmOp(
Expand Down Expand Up @@ -298,15 +288,21 @@ void tvmTransform(
const std::vector<std::string>& weight_names,
const ShapeInfoMap& shape_hints,
const std::unordered_set<int>& blacklisted_ops,
size_t max_batch_size,
size_t max_seq_size,
int32_t max_batch_size,
int32_t max_seq_size,
int32_t num_embeddings,
int32_t embedding_size,
int32_t tvm_min_ops,
bool tvm_profiling_based_jit,
bool debug) {
TvmTransformOptions opts;
opts.bound_shape_spec.max_batch_size = max_batch_size;
opts.bound_shape_spec.max_seq_size = max_seq_size;
opts.bound_shape_spec.num_embeddings = num_embeddings;
opts.bound_shape_spec.embedding_length = embedding_size;
opts.min_ops = tvm_min_ops;
opts.profiling_based_jit = tvm_profiling_based_jit;
opts.debug = debug;
opts.profiling_based_jit = FLAGS_caffe2_tvm_profiling_based_jit;
opts.min_ops = FLAGS_caffe2_tvm_min_ops;
TvmTransformer ts(opts);

// Clean up the external input/output of the net
Expand Down
12 changes: 7 additions & 5 deletions caffe2/opt/tvm_transformer.h
Expand Up @@ -4,15 +4,13 @@

#include <unordered_set>

C10_DECLARE_bool(caffe2_tvm_profiling_based_jit);

namespace caffe2 {

struct TvmTransformOptions final : public BackendTransformOptions {
explicit TvmTransformOptions() : BackendTransformOptions() {}

// Whether to enable profiling based jit
bool profiling_based_jit{true};
bool profiling_based_jit{false};
};

class CAFFE2_API TvmTransformer final : public BackendTransformerBase {
Expand Down Expand Up @@ -78,8 +76,12 @@ CAFFE2_API void tvmTransform(
const std::vector<std::string>& weight_names,
const ShapeInfoMap& shape_hints,
const std::unordered_set<int>& blacklisted_ops,
size_t max_batch_size,
size_t max_seq_size,
int32_t max_batch_size,
int32_t max_seq_size,
int32_t num_embeddings,
int32_t embedding_size,
int32_t tvm_min_ops,
bool tvm_profiling_based_jit,
bool debug);

CAFFE2_API void cleanUpPredictNet(
Expand Down

0 comments on commit e26631b

Please sign in to comment.