Update on "[JIT] Optimize FunctionSchema::checkArg for the Tensor case."

The Tensor case is one of the most common and the existing check can be made faster. This results in a ~21% improvement on DeepAndWide model and would improve other models as well. Before the change: ``` 505[ms] 491[ms] 514[ms] 538[ms] 514[ms] 554[ms] 556[ms] 512[ms] 516[ms] 527[ms] ``` After the change: ``` 406[ms] 394[ms] 414[ms] 423[ms] 449[ms] 397[ms] 410[ms] 389[ms] 395[ms] 414[ms] ``` Differential Revision: [D24999486](https://our.internmc.facebook.com/intern/diff/D24999486) [ghstack-poisoned]
pytorch · Nov 16, 2020 · c6bf232 · c6bf232
1 parent 4344256
commit c6bf232
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 36 deletions.
diff --git a/aten/src/ATen/core/function_schema_inl.h b/aten/src/ATen/core/function_schema_inl.h
@@ -151,6 +151,10 @@ inline void FunctionSchema::checkArg(
     const IValue& value,
     const Argument& argument,
     optional<size_t> pos) const {
+  if (value.isTensor() && argument.type() == TensorType::get()) {
+    // Fast-path for the common case
+    return;
+  }
   if (!value.type()->isSubtypeOf(argument.type())) {
     TORCH_CHECK(
         false,

diff --git a/aten/src/ATen/core/ivalue.h b/aten/src/ATen/core/ivalue.h
@@ -685,10 +685,6 @@ struct CAFFE2_API IValue final {
     return "InvalidTag(" + c10::guts::to_string(static_cast<int>(tag)) + ")";
   }
 
-  uint32_t tagAsInt() const {
-    return static_cast<uint32_t>(tag);
-  }
-
   // generic v.to<at::Tensor>() implementations
   // that can be used in special functions like pop/push
   // that use template meta-programming.

diff --git a/torch/csrc/jit/api/function_impl.cpp b/torch/csrc/jit/api/function_impl.cpp
@@ -45,33 +45,10 @@ c10::intrusive_ptr<c10::ivalue::Future> GraphFunction::runAsync(
   return get_executor().runAsync(stack, std::move(taskLauncher));
 }
 
-size_t GraphFunction::computeInputTypesHash(
-    const std::vector<IValue>& stack) const {
-  // Use an algorithm similar to boost::hash_combine to compute the vector hash
-  size_t r = 0;
-  const size_t magic_number = 0x9e3779b9;
-  for (const IValue& iv : stack) {
-    r ^= std::hash<uint32_t>{}(iv.tagAsInt()) + magic_number + (r << 6) +
-        (r >> 2);
-  }
-  return r;
-}
-
 IValue GraphFunction::operator()(
     std::vector<IValue> stack,
     const Kwargs& kwargs) {
-  bool need_schema_check = true;
-  if (!kwargs.size()) { // Fast path
-    size_t input_types_hash = computeInputTypesHash(stack);
-    if (!schema_checks_cache_.count(input_types_hash)) {
-      getSchema().checkAndNormalizeInputs(stack, kwargs);
-      schema_checks_cache_.insert(input_types_hash);
-    }
-    need_schema_check = false;
-  }
-  if (need_schema_check) {
-    getSchema().checkAndNormalizeInputs(stack, kwargs);
-  }
+  getSchema().checkAndNormalizeInputs(stack, kwargs);
   run(stack);
   return stack.front();
 }

diff --git a/torch/csrc/jit/api/function_impl.h b/torch/csrc/jit/api/function_impl.h
@@ -141,14 +141,6 @@ struct TORCH_API GraphFunction : public Function {
   // mutable because getSchema caches the default schema if one is requested
   // before a call to setSchema
   mutable std::unique_ptr<FunctionSchema> schema_;
-
-  // If we're seeing inputs of the same types over and over again (a frequent
-  // use case when a model is run in inference), and all of the inputs are
-  // positional, we can shortcut expensive schema checks. We do this by
-  // computing a hash of types of the inputs and if that combination of types
-  // has been seen before, we skip the schema check.
-  mutable std::unordered_set<size_t> schema_checks_cache_;
-  size_t computeInputTypesHash(const std::vector<IValue>& stack) const;
 };
 } // namespace jit
 } // namespace torch