Skip to content

Commit

Permalink
Lower everything before profiling, allowing for lowered profile to be…
Browse files Browse the repository at this point in the history
… gathered
  • Loading branch information
jfix71 committed Feb 7, 2019
1 parent f384faf commit 6362974
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 100 deletions.
10 changes: 9 additions & 1 deletion examples/fr2en.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ struct Model {
Placeholder *seqLength_;
Placeholder *output_;
Context ctx;
LoweredNamesMap loweredMap_;

void loadLanguages();
void loadEncoder();
Expand All @@ -150,6 +151,10 @@ struct Model {
// operations perform CSE, etc.
::optimize(F_, glow::CompilationMode::Infer);

// Lower everything for profile and log lowered info in loweredMap_. Used
// later when creating quantization infos.
::lower(F_, *EE_.getBackend(), &loweredMap_);

// Instrument the graph to capture profiles for nodes' outputs.
F_ = glow::profileQuantization(ctx, F_);
}
Expand All @@ -161,6 +166,9 @@ struct Model {
// the same graph structure.
glow::optimize(F_, CompilationMode::Infer);

// Lower however the backend prefers.
::lower(F_, *EE_.getBackend());

auto quantizationInfos = deserializeFromYaml(loadProfileFileOpt);

// Quantize the graph based on the captured profile.
Expand Down Expand Up @@ -410,7 +418,7 @@ void Model::translate(const std::vector<std::string> &batch) {

if (!dumpProfileFileOpt.empty()) {
std::vector<NodeQuantizationInfo> QI =
quantization::generateNodeQuantizationInfos(ctx, F_);
quantization::generateNodeQuantizationInfos(ctx, F_, loweredMap_);
serializeToYaml(dumpProfileFileOpt, QI);
}
}
Expand Down
13 changes: 10 additions & 3 deletions include/glow/Optimizer/Optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef GLOW_OPTIMIZER_OPTIMIZER_H
#define GLOW_OPTIMIZER_OPTIMIZER_H

#include "glow/Quantization/Quantization.h"

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"

Expand All @@ -39,9 +41,14 @@ void optimize(IRFunction &M, bool shouldShareBuffers);
/// Perform optimizations on the graph representation.
void optimize(Function *F, CompilationMode mode);

/// Lower the high-level neural network operators into low-level linear algebra
/// operators.
void lower(Function *F, const Backend &B);
/// Lower the high-level neural network nodes found in \p F into low-level
/// linear algebra operators. \p B can prevent lowering of a node via \ref
/// Backend::shouldLower(). If \p loweredMap is not a nullptr, then everything
/// is lowered regardless of the preferences of \p B, and \p loweredMap will
/// contain a mapping from output names of the nodes found and lowered in \p F
/// to the output names of the nodes they were lowered from from.
void lower(Function *F, const Backend &B,
LoweredNamesMap *loweredMap = nullptr);

/// Dead code elimination.
void DCE(Function *F);
Expand Down
18 changes: 14 additions & 4 deletions include/glow/Quantization/Quantization.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ namespace glow {

class ExecutionEngine;

/// Used to keep track of the origin of lowered Nodes via output names as
/// determined by NodeQuantizationInfo::generateNodeOutputName(). For example if
/// some NodeValue X is lowered from some NodeValue Y, then the output name of X
/// is a key which maps to a set of names which contains the output name of Y.
using LoweredNamesMap = llvm::StringMap<std::set<std::string>>;

/// Tensor quantization parameters for a given node.
struct NodeQuantizationInfo {
std::string nodeOutputName_;
Expand All @@ -53,11 +59,15 @@ struct NodeQuantizationInfo {
namespace quantization {

/// Generate NodeQuantizationInfo for all required nodes from function \p F
/// using the method specified by \p schema and target quantization
/// precision \p quantizationPrecision. Profiling values will be written into
/// context \p ctx.
/// using the method specified by \p schema and target quantization precision \p
/// quantizationPrecision. Profiling values will be written into context \p
/// ctx. \p loweredMap maps from the NodeOutputName of a NodeValue which was
/// lowered to a vector of the original NodeOutputNames which it replaced; this
/// map is used to generate infos for the original unlowered NodeValues which no
/// longer exist in \p F.
std::vector<NodeQuantizationInfo> generateNodeQuantizationInfos(
Context &ctx, const Function *F, Schema schema = Schema::Asymmetric,
Context &ctx, const Function *F, const LoweredNamesMap &loweredMap = {},
Schema schema = Schema::Asymmetric,
ElemKind quantizationPrecision = ElemKind::Int8QTy);

/// Quantizes the function \p F into a new unoptimized partially quantized
Expand Down
Loading

0 comments on commit 6362974

Please sign in to comment.