Skip to content

Commit 62fc44c

Browse files
committed
[MLInliner] In development mode, obtain the output specs from a file
Different training algorithms may produce models that, besides the main policy output (i.e. inline/don't inline), produce additional outputs that are necessary for the next training stage. To facilitate this, in development mode, we require the training policy infrastructure produce a description of the outputs that are interesting to it, in the form of a JSON file. We special-case the first entry in the JSON file as the inlining decision - we care about its value, so we can guide inlining during training - but treat the rest as opaque data that we just copy over to the training log. Differential Revision: https://reviews.llvm.org/D85674
1 parent 819b2d9 commit 62fc44c

File tree

4 files changed

+202
-25
lines changed

4 files changed

+202
-25
lines changed

llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp

Lines changed: 156 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/IR/LLVMContext.h"
2222
#include "llvm/Support/CommandLine.h"
2323
#include "llvm/Support/ManagedStatic.h"
24+
#include "llvm/Support/Path.h"
2425

2526
#include <vector>
2627

@@ -32,17 +33,43 @@ static cl::opt<std::string> TrainingLog(
3233

3334
static cl::opt<std::string> TFModelUnderTrainingPath(
3435
"ml-inliner-model-under-training", cl::Hidden,
35-
cl::desc("Path to SavedModel from the previous training iteration."));
36+
cl::desc(R"(Path to SavedModel from the previous training iteration.
37+
The directory is also expected to contain a JSON specification of the
38+
outputs expected to be logged, where the first entry must be the
39+
inlining decision. The file containing the specification should be
40+
called output_spec.json. The expected JSON value is an array of
41+
dictionaries. Each dictionary should have 2 keys:
42+
43+
- "tensor_spec, followed by the TensorSpec description of the
44+
output; and
45+
- "logging_name", a string indicating the name to use when
46+
logging the output values.
47+
48+
Example:
49+
[
50+
{
51+
"logging_name" : "some_name",
52+
"tensor_spec" : {
53+
"name" : "model_name",
54+
"port" : 0,
55+
"shape" : [2, 3],
56+
"type" : "float"
57+
}
58+
}
59+
]
60+
61+
The first value must always correspond to the decision.)"));
62+
63+
static cl::opt<std::string> TFOutputSpecOverride(
64+
"ml-inliner-output-spec-override", cl::Hidden,
65+
cl::desc("Override the path to the output spec json file. See "
66+
"-ml-inliner-model-under-training documentation for the "
67+
"specification of that file."));
3668

3769
static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
3870
cl::Hidden, cl::init("action_"),
3971
cl::desc("Prefix for feature names."));
4072

41-
static cl::opt<std::string> TFDecisionName(
42-
"ml-inliner-trained-model-decision-name", cl::Hidden,
43-
cl::init("StatefulPartitionedCall"),
44-
cl::desc("Name of the graph operation representing the decision."));
45-
4673
namespace {
4774
/// An InlineEvent, used by TrainingLogger.
4875
struct InlineEvent {
@@ -69,9 +96,10 @@ struct InlineEvent {
6996
/// Because this is a protobuf, we cannot just stream the events as they come.
7097
/// Internally, TrainingLogger stores data in column-major format, because that
7198
/// lines up with how TF SequenceExample represents it.
99+
class ModelUnderTrainingRunner;
72100
class TrainingLogger final {
73101
public:
74-
TrainingLogger(StringRef LogFileName);
102+
TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
75103

76104
/// Log one inlining event.
77105
void logInlineEvent(const InlineEvent &Event,
@@ -157,9 +185,13 @@ class TrainingLogger final {
157185
}
158186

159187
StringRef LogFileName;
188+
const ModelUnderTrainingRunner *const MUTR;
160189
std::vector<InlineFeatures> Features;
161190
std::vector<int64_t> DefaultDecisions;
162-
std::vector<int64_t> Decisions;
191+
// We store all outputs as data blobs, but we always expect to have one, the
192+
// first one, representing the decision. While we could track that separately,
193+
// for uniformity, we store it, generically, here.
194+
std::vector<std::vector<char>> Outputs;
163195
std::vector<bool> Effects;
164196
std::vector<int64_t> Rewards;
165197
};
@@ -336,8 +368,22 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
336368
int64_t getFeature(int Index) const override;
337369
bool isValid() const { return !!Evaluator; }
338370

371+
const std::vector<std::string> outputNames() const { return OutputNames; }
372+
373+
const std::vector<TensorSpec> outputSpecs() const { return OutputSpecs; }
374+
375+
const Optional<TFModelEvaluator::EvaluationResult> &
376+
lastEvaluationResult() const {
377+
return LastEvaluationResult;
378+
}
379+
339380
private:
340381
std::unique_ptr<TFModelEvaluator> Evaluator;
382+
std::vector<std::string> OutputNames;
383+
std::vector<TensorSpec> OutputSpecs;
384+
Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
385+
386+
bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName);
341387

342388
// The training framework needs some additional features.
343389
const std::vector<TensorSpec> TrainingOnlyFeatures{
@@ -348,10 +394,15 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
348394
};
349395
} // namespace
350396

351-
TrainingLogger::TrainingLogger(StringRef LogFileName)
352-
: LogFileName(LogFileName) {
397+
TrainingLogger::TrainingLogger(StringRef LogFileName,
398+
const ModelUnderTrainingRunner *MUTR)
399+
: LogFileName(LogFileName), MUTR(MUTR) {
353400
for (size_t I = 0; I < NumberOfFeatures; ++I)
354401
Features.push_back(InlineFeatures());
402+
403+
// The first output is the inlining decision.
404+
auto OutputCount = MUTR ? MUTR->outputSpecs().size() : 1;
405+
Outputs.assign(OutputCount, std::vector<char>());
355406
}
356407

357408
/// Log one inlining event.
@@ -360,16 +411,27 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
360411
for (size_t I = 0; I < NumberOfFeatures; ++I)
361412
Features[I].push_back(ModelRunner.getFeature(I));
362413

363-
Decisions.push_back(Event.AdvisedDecision);
364414
Effects.push_back(Event.Effect);
365415
Rewards.push_back(Event.Reward);
366416
DefaultDecisions.push_back(Event.DefaultDecision);
417+
int64_t Advice = static_cast<int64_t>(Event.AdvisedDecision);
418+
const char *AdviceData = reinterpret_cast<const char *>(&Advice);
419+
Outputs[0].insert(Outputs[0].end(), AdviceData, AdviceData + sizeof(int64_t));
420+
for (size_t I = 1; I < Outputs.size(); ++I) {
421+
const auto &Result = *MUTR->lastEvaluationResult();
422+
auto &Spec = MUTR->outputSpecs()[I];
423+
const char *RawData =
424+
reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
425+
Outputs[I].insert(Outputs[I].end(), RawData,
426+
RawData +
427+
Spec.getElementCount() * Spec.getElementByteSize());
428+
}
367429
}
368430

369431
void TrainingLogger::print() {
370432
std::error_code EC;
371433
raw_fd_ostream OutFile(LogFileName, EC);
372-
size_t NumberOfRecords = Decisions.size();
434+
size_t NumberOfRecords = Rewards.size();
373435
if (NumberOfRecords == 0)
374436
return;
375437

@@ -383,13 +445,18 @@ void TrainingLogger::print() {
383445
OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}),
384446
DefaultDecisions.data(), NumberOfRecords);
385447

386-
writeTensorsAsFeatureLists(OutFile,
387-
TensorSpec::createSpec<int64_t>(DecisionName, {1}),
388-
Decisions.data(), NumberOfRecords);
448+
writeRawTensorsAsFeatureLists(
449+
OutFile, TensorSpec::createSpec<int64_t>(DecisionName, {1}),
450+
Outputs[0].data(), NumberOfRecords);
389451
writeTensorsAsFeatureLists(OutFile,
390452
TensorSpec::createSpec<int64_t>(RewardName, {1}),
391453
Rewards.data(), NumberOfRecords);
392454

455+
for (size_t I = 1; I < Outputs.size(); ++I)
456+
writeRawTensorsAsFeatureLists(OutFile, MUTR->outputSpecs()[I],
457+
Outputs[I].data(), NumberOfRecords,
458+
StringRef(MUTR->outputNames()[I]));
459+
393460
OutFile << "}\n";
394461
}
395462

@@ -472,13 +539,19 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
472539
const std::string &ModelPath)
473540
: MLModelRunner(Ctx) {
474541
std::vector<TensorSpec> InputSpecs;
475-
std::vector<TensorSpec> OutputSpecs;
476542
for (size_t I = 0; I < NumberOfFeatures; ++I)
477543
InputSpecs.push_back(
478544
TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
479545
InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
480546
TrainingOnlyFeatures.end());
481-
OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1}));
547+
SmallVector<char, 128> OutputSpecsPath;
548+
StringRef OutputSpecPath = TFOutputSpecOverride;
549+
if (OutputSpecPath.empty()) {
550+
llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
551+
OutputSpecPath = {OutputSpecsPath.data(), OutputSpecsPath.size()};
552+
}
553+
if (!loadOutputSpecs(Ctx, OutputSpecPath))
554+
return;
482555

483556
Evaluator =
484557
std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
@@ -489,13 +562,70 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
489562
}
490563
}
491564

565+
bool ModelUnderTrainingRunner::loadOutputSpecs(LLVMContext &Ctx,
566+
StringRef FileName) {
567+
auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
568+
if (!BufferOrError) {
569+
Ctx.emitError("Error opening output specs file: " + FileName + " : " +
570+
BufferOrError.getError().message());
571+
return false;
572+
}
573+
auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
574+
if (!ParsedJSONValues) {
575+
Ctx.emitError("Could not parse specs file: " + FileName);
576+
return false;
577+
}
578+
auto ValuesArray = ParsedJSONValues->getAsArray();
579+
if (!ValuesArray) {
580+
Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
581+
"logging_name:<name>} dictionaries");
582+
return false;
583+
}
584+
585+
for (const auto &Value : *ValuesArray)
586+
if (const auto *Obj = Value.getAsObject())
587+
if (const auto *SpecPart = Obj->get("tensor_spec"))
588+
if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
589+
if (auto LoggingName = Obj->getString("logging_name")) {
590+
if (!TensorSpec->isElementType<int64_t>() &&
591+
!TensorSpec->isElementType<int32_t>() &&
592+
!TensorSpec->isElementType<float>()) {
593+
Ctx.emitError(
594+
"Only int64, int32, and float tensors are supported. "
595+
"Found unsupported type for tensor named " +
596+
TensorSpec->name());
597+
return false;
598+
}
599+
OutputNames.push_back(LoggingName->str());
600+
OutputSpecs.push_back(*TensorSpec);
601+
}
602+
603+
if (ValuesArray->size() != OutputNames.size()) {
604+
Ctx.emitError(
605+
"Unable to parse output spec. It should be a json file containing an "
606+
"array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
607+
"with a json object describing a TensorSpec; and a 'logging_name' key, "
608+
"which is a string to use as name when logging this tensor in the "
609+
"training log.");
610+
return false;
611+
}
612+
assert(OutputNames.size() == OutputSpecs.size());
613+
if (OutputNames.empty() || OutputNames[0] != DecisionName) {
614+
Ctx.emitError("The first output spec must describe the decision tensor, "
615+
"and must have the logging_name " +
616+
StringRef(DecisionName));
617+
return false;
618+
}
619+
return true;
620+
}
621+
492622
bool ModelUnderTrainingRunner::run() {
493-
auto ER = Evaluator->evaluate();
494-
if (!ER.hasValue()) {
623+
LastEvaluationResult = Evaluator->evaluate();
624+
if (!LastEvaluationResult.hasValue()) {
495625
Ctx.emitError("Error evaluating model.");
496626
return false;
497627
}
498-
int64_t Decision = *ER->getTensorValue<int64_t>(0);
628+
int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
499629
return static_cast<bool>(Decision);
500630
}
501631

@@ -521,22 +651,24 @@ std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
521651
}
522652

523653
std::unique_ptr<MLModelRunner> Runner;
524-
654+
ModelUnderTrainingRunner *MUTRPtr = nullptr;
525655
bool IsDoingInference = false;
526656
if (TFModelUnderTrainingPath.empty())
527657
Runner.reset(new NoInferenceModelRunner(Ctx));
528658
else {
529-
Runner = std::make_unique<ModelUnderTrainingRunner>(
659+
auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
530660
Ctx, TFModelUnderTrainingPath);
531-
if (!Runner) {
661+
if (!MUTR || !MUTR->isValid()) {
532662
Ctx.emitError("Could not load the policy model from the provided path");
533663
return nullptr;
534664
}
535665
IsDoingInference = true;
666+
MUTRPtr = MUTR.get();
667+
Runner = std::move(MUTR);
536668
}
537669
std::unique_ptr<TrainingLogger> Logger;
538670
if (!TrainingLog.empty())
539-
Logger = std::make_unique<TrainingLogger>(TrainingLog);
671+
Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr);
540672

541673
return std::make_unique<DevelopmentModeMLInlineAdvisor>(
542674
M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[
2+
{
3+
"logging_name": "inlining_decision",
4+
"tensor_spec": {
5+
"name": "StatefulPartitionedCall",
6+
"port": 0,
7+
"type": "int64",
8+
"shape": [
9+
1
10+
]
11+
}
12+
}
13+
]
14+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"logging_name": "inlining_decision",
4+
"tensor_spec": {
5+
"name": "StatefulPartitionedCall",
6+
"port": 0,
7+
"type": "int64",
8+
"shape": [
9+
1
10+
]
11+
}
12+
},
13+
{
14+
"logging_name": "fake_extra_output",
15+
"tensor_spec": {
16+
"name": "StatefulPartitionedCall",
17+
"port": 0,
18+
"type": "int64",
19+
"shape": [
20+
1
21+
]
22+
}
23+
}
24+
]
25+

llvm/test/Transforms/Inline/ML/development-training-log.ll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Test that we can produce a log if we have or do not have a model, in development mode.
22
; REQUIRES: have_tf_api
33
; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s
4+
; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-output-spec-override=%S/Inputs/test_output_spec.json -S < %s | FileCheck %s --check-prefix=EXTRA-OUTPUTS
45
; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s
56

67
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -48,4 +49,9 @@ define dso_local i32 @top() {
4849
; CHECK-NEXT: key: "delta_size" value: {
4950
; CHECK-NEXT: feature: { int64_list: { value: [0] } }
5051
; CHECK-NEXT: }
51-
; CHECK-NEXT: }
52+
; CHECK-NEXT: }
53+
; CHECK-NOT: fake_extra_output
54+
; EXTRA-OUTPUTS: key: "fake_extra_output" value: {
55+
; EXTRA-OUTPUTS-NEXT: feature: { int64_list: { value: [1] } }
56+
; EXTRA-OUTPUTS-NEXT: }
57+
; EXTRA-OUTPUTS-NEXT: }

0 commit comments

Comments
 (0)