21
21
#include " llvm/IR/LLVMContext.h"
22
22
#include " llvm/Support/CommandLine.h"
23
23
#include " llvm/Support/ManagedStatic.h"
24
+ #include " llvm/Support/Path.h"
24
25
25
26
#include < vector>
26
27
@@ -32,17 +33,43 @@ static cl::opt<std::string> TrainingLog(
32
33
33
34
static cl::opt<std::string> TFModelUnderTrainingPath (
34
35
" ml-inliner-model-under-training" , cl::Hidden,
35
- cl::desc (" Path to SavedModel from the previous training iteration." ));
36
+ cl::desc (R"( Path to SavedModel from the previous training iteration.
37
+ The directory is also expected to contain a JSON specification of the
38
+ outputs expected to be logged, where the first entry must be the
39
+ inlining decision. The file containing the specification should be
40
+ called output_spec.json. The expected JSON value is an array of
41
+ dictionaries. Each dictionary should have 2 keys:
42
+
43
+ - "tensor_spec, followed by the TensorSpec description of the
44
+ output; and
45
+ - "logging_name", a string indicating the name to use when
46
+ logging the output values.
47
+
48
+ Example:
49
+ [
50
+ {
51
+ "logging_name" : "some_name",
52
+ "tensor_spec" : {
53
+ "name" : "model_name",
54
+ "port" : 0,
55
+ "shape" : [2, 3],
56
+ "type" : "float"
57
+ }
58
+ }
59
+ ]
60
+
61
+ The first value must always correspond to the decision.)" ));
62
+
63
+ static cl::opt<std::string> TFOutputSpecOverride (
64
+ " ml-inliner-output-spec-override" , cl::Hidden,
65
+ cl::desc (" Override the path to the output spec json file. See "
66
+ " -ml-inliner-model-under-training documentation for the "
67
+ " specification of that file." ));
36
68
37
69
static cl::opt<std::string> TFFeedPrefix (" ml-inliner-trained-model-feed-prefix" ,
38
70
cl::Hidden, cl::init(" action_" ),
39
71
cl::desc(" Prefix for feature names." ));
40
72
41
- static cl::opt<std::string> TFDecisionName (
42
- " ml-inliner-trained-model-decision-name" , cl::Hidden,
43
- cl::init (" StatefulPartitionedCall" ),
44
- cl::desc(" Name of the graph operation representing the decision." ));
45
-
46
73
namespace {
47
74
// / An InlineEvent, used by TrainingLogger.
48
75
struct InlineEvent {
@@ -69,9 +96,10 @@ struct InlineEvent {
69
96
// / Because this is a protobuf, we cannot just stream the events as they come.
70
97
// / Internally, TrainingLogger stores data in column-major format, because that
71
98
// / lines up with how TF SequenceExample represents it.
99
+ class ModelUnderTrainingRunner ;
72
100
class TrainingLogger final {
73
101
public:
74
- TrainingLogger (StringRef LogFileName);
102
+ TrainingLogger (StringRef LogFileName, const ModelUnderTrainingRunner *MUTR );
75
103
76
104
// / Log one inlining event.
77
105
void logInlineEvent (const InlineEvent &Event,
@@ -157,9 +185,13 @@ class TrainingLogger final {
157
185
}
158
186
159
187
StringRef LogFileName;
188
+ const ModelUnderTrainingRunner *const MUTR;
160
189
std::vector<InlineFeatures> Features;
161
190
std::vector<int64_t > DefaultDecisions;
162
- std::vector<int64_t > Decisions;
191
+ // We store all outputs as data blobs, but we always expect to have one, the
192
+ // first one, representing the decision. While we could track that separately,
193
+ // for uniformity, we store it, generically, here.
194
+ std::vector<std::vector<char >> Outputs;
163
195
std::vector<bool > Effects;
164
196
std::vector<int64_t > Rewards;
165
197
};
@@ -336,8 +368,22 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
336
368
int64_t getFeature (int Index) const override ;
337
369
bool isValid () const { return !!Evaluator; }
338
370
371
+ const std::vector<std::string> outputNames () const { return OutputNames; }
372
+
373
+ const std::vector<TensorSpec> outputSpecs () const { return OutputSpecs; }
374
+
375
+ const Optional<TFModelEvaluator::EvaluationResult> &
376
+ lastEvaluationResult () const {
377
+ return LastEvaluationResult;
378
+ }
379
+
339
380
private:
340
381
std::unique_ptr<TFModelEvaluator> Evaluator;
382
+ std::vector<std::string> OutputNames;
383
+ std::vector<TensorSpec> OutputSpecs;
384
+ Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
385
+
386
+ bool loadOutputSpecs (LLVMContext &Ctx, StringRef FileName);
341
387
342
388
// The training framework needs some additional features.
343
389
const std::vector<TensorSpec> TrainingOnlyFeatures{
@@ -348,10 +394,15 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
348
394
};
349
395
} // namespace
350
396
351
- TrainingLogger::TrainingLogger (StringRef LogFileName)
352
- : LogFileName(LogFileName) {
397
+ TrainingLogger::TrainingLogger (StringRef LogFileName,
398
+ const ModelUnderTrainingRunner *MUTR)
399
+ : LogFileName(LogFileName), MUTR(MUTR) {
353
400
for (size_t I = 0 ; I < NumberOfFeatures; ++I)
354
401
Features.push_back (InlineFeatures ());
402
+
403
+ // The first output is the inlining decision.
404
+ auto OutputCount = MUTR ? MUTR->outputSpecs ().size () : 1 ;
405
+ Outputs.assign (OutputCount, std::vector<char >());
355
406
}
356
407
357
408
// / Log one inlining event.
@@ -360,16 +411,27 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
360
411
for (size_t I = 0 ; I < NumberOfFeatures; ++I)
361
412
Features[I].push_back (ModelRunner.getFeature (I));
362
413
363
- Decisions.push_back (Event.AdvisedDecision );
364
414
Effects.push_back (Event.Effect );
365
415
Rewards.push_back (Event.Reward );
366
416
DefaultDecisions.push_back (Event.DefaultDecision );
417
+ int64_t Advice = static_cast <int64_t >(Event.AdvisedDecision );
418
+ const char *AdviceData = reinterpret_cast <const char *>(&Advice);
419
+ Outputs[0 ].insert (Outputs[0 ].end (), AdviceData, AdviceData + sizeof (int64_t ));
420
+ for (size_t I = 1 ; I < Outputs.size (); ++I) {
421
+ const auto &Result = *MUTR->lastEvaluationResult ();
422
+ auto &Spec = MUTR->outputSpecs ()[I];
423
+ const char *RawData =
424
+ reinterpret_cast <const char *>(Result.getUntypedTensorValue (I));
425
+ Outputs[I].insert (Outputs[I].end (), RawData,
426
+ RawData +
427
+ Spec.getElementCount () * Spec.getElementByteSize ());
428
+ }
367
429
}
368
430
369
431
void TrainingLogger::print () {
370
432
std::error_code EC;
371
433
raw_fd_ostream OutFile (LogFileName, EC);
372
- size_t NumberOfRecords = Decisions .size ();
434
+ size_t NumberOfRecords = Rewards .size ();
373
435
if (NumberOfRecords == 0 )
374
436
return ;
375
437
@@ -383,13 +445,18 @@ void TrainingLogger::print() {
383
445
OutFile, TensorSpec::createSpec<int64_t >(DefaultDecisionName, {1 }),
384
446
DefaultDecisions.data (), NumberOfRecords);
385
447
386
- writeTensorsAsFeatureLists (OutFile,
387
- TensorSpec::createSpec<int64_t >(DecisionName, {1 }),
388
- Decisions .data (), NumberOfRecords);
448
+ writeRawTensorsAsFeatureLists (
449
+ OutFile, TensorSpec::createSpec<int64_t >(DecisionName, {1 }),
450
+ Outputs[ 0 ] .data (), NumberOfRecords);
389
451
writeTensorsAsFeatureLists (OutFile,
390
452
TensorSpec::createSpec<int64_t >(RewardName, {1 }),
391
453
Rewards.data (), NumberOfRecords);
392
454
455
+ for (size_t I = 1 ; I < Outputs.size (); ++I)
456
+ writeRawTensorsAsFeatureLists (OutFile, MUTR->outputSpecs ()[I],
457
+ Outputs[I].data (), NumberOfRecords,
458
+ StringRef (MUTR->outputNames ()[I]));
459
+
393
460
OutFile << " }\n " ;
394
461
}
395
462
@@ -472,13 +539,19 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
472
539
const std::string &ModelPath)
473
540
: MLModelRunner(Ctx) {
474
541
std::vector<TensorSpec> InputSpecs;
475
- std::vector<TensorSpec> OutputSpecs;
476
542
for (size_t I = 0 ; I < NumberOfFeatures; ++I)
477
543
InputSpecs.push_back (
478
544
TensorSpec::createSpec<int64_t >(TFFeedPrefix + FeatureNameMap[I], {1 }));
479
545
InputSpecs.insert (InputSpecs.end (), TrainingOnlyFeatures.begin (),
480
546
TrainingOnlyFeatures.end ());
481
- OutputSpecs.push_back (TensorSpec::createSpec<int64_t >(TFDecisionName, {1 }));
547
+ SmallVector<char , 128 > OutputSpecsPath;
548
+ StringRef OutputSpecPath = TFOutputSpecOverride;
549
+ if (OutputSpecPath.empty ()) {
550
+ llvm::sys::path::append (OutputSpecsPath, ModelPath, " output_spec.json" );
551
+ OutputSpecPath = {OutputSpecsPath.data (), OutputSpecsPath.size ()};
552
+ }
553
+ if (!loadOutputSpecs (Ctx, OutputSpecPath))
554
+ return ;
482
555
483
556
Evaluator =
484
557
std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
@@ -489,13 +562,70 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
489
562
}
490
563
}
491
564
565
+ bool ModelUnderTrainingRunner::loadOutputSpecs (LLVMContext &Ctx,
566
+ StringRef FileName) {
567
+ auto BufferOrError = MemoryBuffer::getFileOrSTDIN (FileName);
568
+ if (!BufferOrError) {
569
+ Ctx.emitError (" Error opening output specs file: " + FileName + " : " +
570
+ BufferOrError.getError ().message ());
571
+ return false ;
572
+ }
573
+ auto ParsedJSONValues = json::parse (BufferOrError.get ()->getBuffer ());
574
+ if (!ParsedJSONValues) {
575
+ Ctx.emitError (" Could not parse specs file: " + FileName);
576
+ return false ;
577
+ }
578
+ auto ValuesArray = ParsedJSONValues->getAsArray ();
579
+ if (!ValuesArray) {
580
+ Ctx.emitError (" Expected an array of {tensor_spec:<TensorSpec>, "
581
+ " logging_name:<name>} dictionaries" );
582
+ return false ;
583
+ }
584
+
585
+ for (const auto &Value : *ValuesArray)
586
+ if (const auto *Obj = Value.getAsObject ())
587
+ if (const auto *SpecPart = Obj->get (" tensor_spec" ))
588
+ if (auto TensorSpec = getTensorSpecFromJSON (Ctx, *SpecPart))
589
+ if (auto LoggingName = Obj->getString (" logging_name" )) {
590
+ if (!TensorSpec->isElementType <int64_t >() &&
591
+ !TensorSpec->isElementType <int32_t >() &&
592
+ !TensorSpec->isElementType <float >()) {
593
+ Ctx.emitError (
594
+ " Only int64, int32, and float tensors are supported. "
595
+ " Found unsupported type for tensor named " +
596
+ TensorSpec->name ());
597
+ return false ;
598
+ }
599
+ OutputNames.push_back (LoggingName->str ());
600
+ OutputSpecs.push_back (*TensorSpec);
601
+ }
602
+
603
+ if (ValuesArray->size () != OutputNames.size ()) {
604
+ Ctx.emitError (
605
+ " Unable to parse output spec. It should be a json file containing an "
606
+ " array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
607
+ " with a json object describing a TensorSpec; and a 'logging_name' key, "
608
+ " which is a string to use as name when logging this tensor in the "
609
+ " training log." );
610
+ return false ;
611
+ }
612
+ assert (OutputNames.size () == OutputSpecs.size ());
613
+ if (OutputNames.empty () || OutputNames[0 ] != DecisionName) {
614
+ Ctx.emitError (" The first output spec must describe the decision tensor, "
615
+ " and must have the logging_name " +
616
+ StringRef (DecisionName));
617
+ return false ;
618
+ }
619
+ return true ;
620
+ }
621
+
492
622
bool ModelUnderTrainingRunner::run () {
493
- auto ER = Evaluator->evaluate ();
494
- if (!ER .hasValue ()) {
623
+ LastEvaluationResult = Evaluator->evaluate ();
624
+ if (!LastEvaluationResult .hasValue ()) {
495
625
Ctx.emitError (" Error evaluating model." );
496
626
return false ;
497
627
}
498
- int64_t Decision = *ER ->getTensorValue <int64_t >(0 );
628
+ int64_t Decision = *LastEvaluationResult ->getTensorValue <int64_t >(0 );
499
629
return static_cast <bool >(Decision);
500
630
}
501
631
@@ -521,22 +651,24 @@ std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
521
651
}
522
652
523
653
std::unique_ptr<MLModelRunner> Runner;
524
-
654
+ ModelUnderTrainingRunner *MUTRPtr = nullptr ;
525
655
bool IsDoingInference = false ;
526
656
if (TFModelUnderTrainingPath.empty ())
527
657
Runner.reset (new NoInferenceModelRunner (Ctx));
528
658
else {
529
- Runner = std::make_unique<ModelUnderTrainingRunner>(
659
+ auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
530
660
Ctx, TFModelUnderTrainingPath);
531
- if (!Runner ) {
661
+ if (!MUTR || !MUTR-> isValid () ) {
532
662
Ctx.emitError (" Could not load the policy model from the provided path" );
533
663
return nullptr ;
534
664
}
535
665
IsDoingInference = true ;
666
+ MUTRPtr = MUTR.get ();
667
+ Runner = std::move (MUTR);
536
668
}
537
669
std::unique_ptr<TrainingLogger> Logger;
538
670
if (!TrainingLog.empty ())
539
- Logger = std::make_unique<TrainingLogger>(TrainingLog);
671
+ Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr );
540
672
541
673
return std::make_unique<DevelopmentModeMLInlineAdvisor>(
542
674
M, MAM, std::move (Runner), GetDefaultAdvice, IsDoingInference,
0 commit comments