Lower everything before profiling, allowing for lowered profile to be…

… gathered
pytorch · Feb 7, 2019 · 6362974 · 6362974
1 parent f384faf
commit 6362974
Show file tree

Hide file tree

Showing 10 changed files with 255 additions and 100 deletions.
diff --git a/examples/fr2en.cpp b/examples/fr2en.cpp
@@ -131,6 +131,7 @@ struct Model {
   Placeholder *seqLength_;
   Placeholder *output_;
   Context ctx;
+  LoweredNamesMap loweredMap_;
 
   void loadLanguages();
   void loadEncoder();
@@ -150,6 +151,10 @@ struct Model {
       // operations perform CSE, etc.
       ::optimize(F_, glow::CompilationMode::Infer);
 
+      // Lower everything for profile and log lowered info in loweredMap_. Used
+      // later when creating quantization infos.
+      ::lower(F_, *EE_.getBackend(), &loweredMap_);
+
       // Instrument the graph to capture profiles for nodes' outputs.
       F_ = glow::profileQuantization(ctx, F_);
     }
@@ -161,6 +166,9 @@ struct Model {
       // the same graph structure.
       glow::optimize(F_, CompilationMode::Infer);
 
+      // Lower however the backend prefers.
+      ::lower(F_, *EE_.getBackend());
+
       auto quantizationInfos = deserializeFromYaml(loadProfileFileOpt);
 
       // Quantize the graph based on the captured profile.
@@ -410,7 +418,7 @@ void Model::translate(const std::vector<std::string> &batch) {
 
   if (!dumpProfileFileOpt.empty()) {
     std::vector<NodeQuantizationInfo> QI =
-        quantization::generateNodeQuantizationInfos(ctx, F_);
+        quantization::generateNodeQuantizationInfos(ctx, F_, loweredMap_);
     serializeToYaml(dumpProfileFileOpt, QI);
   }
 }

diff --git a/include/glow/Optimizer/Optimizer.h b/include/glow/Optimizer/Optimizer.h
@@ -16,6 +16,8 @@
 #ifndef GLOW_OPTIMIZER_OPTIMIZER_H
 #define GLOW_OPTIMIZER_OPTIMIZER_H
 
+#include "glow/Quantization/Quantization.h"
+
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -39,9 +41,14 @@ void optimize(IRFunction &M, bool shouldShareBuffers);
 /// Perform optimizations on the graph representation.
 void optimize(Function *F, CompilationMode mode);
 
-/// Lower the high-level neural network operators into low-level linear algebra
-/// operators.
-void lower(Function *F, const Backend &B);
+/// Lower the high-level neural network nodes found in \p F into low-level
+/// linear algebra operators. \p B can prevent lowering of a node via \ref
+/// Backend::shouldLower(). If \p loweredMap is not a nullptr, then everything
+/// is lowered regardless of the preferences of \p B, and \p loweredMap will
+/// contain a mapping from output names of the nodes found and lowered in \p F
+/// to the output names of the nodes they were lowered from from.
+void lower(Function *F, const Backend &B,
+           LoweredNamesMap *loweredMap = nullptr);
 
 /// Dead code elimination.
 void DCE(Function *F);

diff --git a/include/glow/Quantization/Quantization.h b/include/glow/Quantization/Quantization.h
@@ -28,6 +28,12 @@ namespace glow {
 
 class ExecutionEngine;
 
+/// Used to keep track of the origin of lowered Nodes via output names as
+/// determined by NodeQuantizationInfo::generateNodeOutputName(). For example if
+/// some NodeValue X is lowered from some NodeValue Y, then the output name of X
+/// is a key which maps to a set of names which contains the output name of Y.
+using LoweredNamesMap = llvm::StringMap<std::set<std::string>>;
+
 /// Tensor quantization parameters for a given node.
 struct NodeQuantizationInfo {
   std::string nodeOutputName_;
@@ -53,11 +59,15 @@ struct NodeQuantizationInfo {
 namespace quantization {
 
 /// Generate NodeQuantizationInfo for all required nodes from function \p F
-/// using the method specified by \p schema and target quantization
-/// precision \p quantizationPrecision. Profiling values will be written into
-/// context \p ctx.
+/// using the method specified by \p schema and target quantization precision \p
+/// quantizationPrecision. Profiling values will be written into context \p
+/// ctx. \p loweredMap maps from the NodeOutputName of a NodeValue which was
+/// lowered to a vector of the original NodeOutputNames which it replaced; this
+/// map is used to generate infos for the original unlowered NodeValues which no
+/// longer exist in \p F.
 std::vector<NodeQuantizationInfo> generateNodeQuantizationInfos(
-    Context &ctx, const Function *F, Schema schema = Schema::Asymmetric,
+    Context &ctx, const Function *F, const LoweredNamesMap &loweredMap = {},
+    Schema schema = Schema::Asymmetric,
     ElemKind quantizationPrecision = ElemKind::Int8QTy);
 
 /// Quantizes the function \p F into a new unoptimized partially quantized