[SampleFDO] New hierarchical discriminator for Flow Sensitive SampleFDO

This patch implements first part of Flow Sensitive SampleFDO (FSAFDO). It has the following changes: (1) disable current discriminator encoding scheme, (2) new hierarchical discriminator for FSAFDO. For this patch, option "-enable-fs-discriminator=true" turns on the new functionality. Option "-enable-fs-discriminator=false" (the default) keeps the current SampleFDO behavior. When the fs-discriminator is enabled, we insert a flag variable, namely, llvm_fs_discriminator, to the object. This symbol will checked by create_llvm_prof tool, and used to generate a profile with FS-AFDO discriminators enabled. If this happens, for an extbinary format profile, create_llvm_prof tool will add a flag to profile summary section. Differential Revision: https://reviews.llvm.org/D102246
llvm · May 18, 2021 · 886629a · 886629a
1 parent ff99fdf
commit 886629a
Show file tree

Hide file tree

Showing 16 changed files with 658 additions and 40 deletions.
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -0,0 +1,74 @@
+//===----- MIRFSDiscriminator.h: MIR FS Discriminator Support --0-- c++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supporting functions for adding Machine level IR
+// Flow Sensitive discriminators to the instruction debug information. With
+// this, a cloned machine instruction in a different MachineBasicBlock will
+// have its own discriminator value. This is done in a MIRAddFSDiscriminators
+// pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
+#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+class MIRAddFSDiscriminators : public MachineFunctionPass {
+  MachineFunction *MF;
+  unsigned LowBit;
+  unsigned HighBit;
+
+public:
+  static char ID;
+  /// FS bits that will be used in this pass (numbers are 0 based and
+  /// inclusive).
+  MIRAddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0)
+      : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) {
+    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+  }
+
+  /// getNumFSBBs() - Return the number of machine BBs that have FS samples.
+  unsigned getNumFSBBs();
+
+  /// getNumFSSamples() - Return the number of samples that have flow sensitive
+  /// values.
+  uint64_t getNumFSSamples();
+
+  /// getMachineFunction - Return the current machine function.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+private:
+  bool runOnMachineFunction(MachineFunction &) override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
@@ -165,6 +165,9 @@ namespace llvm {
   /// This pass perform post-ra machine sink for COPY instructions.
   extern char &PostRAMachineSinkingID;
 
+  /// This pass adds flow sensitive discriminators.
+  extern char &MIRAddFSDiscriminatorsID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -487,6 +490,10 @@ namespace llvm {
   /// Create IR Type Promotion pass. \see TypePromotion.cpp
   FunctionPass *createTypePromotionPass();
 
+  /// Add Flow Sensitive Discriminators.
+  FunctionPass *createMIRAddFSDiscriminatorsPass(unsigned LowBit,
+                                                 unsigned HighBit);
+
   /// Creates MIR Debugify pass. \see MachineDebugify.cpp
   ModulePass *createDebugifyMachineModulePass();
 

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -26,6 +26,8 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Discriminator.h"
 #include <cassert>
 #include <climits>
 #include <cstddef>
@@ -60,6 +62,8 @@
 
 namespace llvm {
 
+extern cl::opt<bool> EnableFSDiscriminator;
+
 class DITypeRefArray {
   const MDTuple *N = nullptr;
 
@@ -1576,46 +1580,13 @@ class DILocation : public MDNode {
                    ShouldCreate);
   }
 
-  /// With a given unsigned int \p U, use up to 13 bits to represent it.
-  /// old_bit 1~5  --> new_bit 1~5
-  /// old_bit 6~12 --> new_bit 7~13
-  /// new_bit_6 is 0 if higher bits (7~13) are all 0
-  static unsigned getPrefixEncodingFromUnsigned(unsigned U) {
-    U &= 0xfff;
-    return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
-  }
-
-  /// Reverse transformation as getPrefixEncodingFromUnsigned.
-  static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
-    if (U & 1)
-      return 0;
-    U >>= 1;
-    return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
-  }
-
-  /// Returns the next component stored in discriminator.
-  static unsigned getNextComponentInDiscriminator(unsigned D) {
-    if ((D & 1) == 0)
-      return D >> ((D & 0x40) ? 14 : 7);
-    else
-      return D >> 1;
-  }
-
   TempDILocation cloneImpl() const {
     // Get the raw scope/inlinedAt since it is possible to invoke this on
     // a DILocation containing temporary metadata.
     return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
                         getRawInlinedAt(), isImplicitCode());
   }
 
-  static unsigned encodeComponent(unsigned C) {
-    return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
-  }
-
-  static unsigned encodingBits(unsigned C) {
-    return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
-  }
-
 public:
   // Disallow replacing operands.
   void replaceOperandWith(unsigned I, Metadata *New) = delete;
@@ -1762,8 +1733,20 @@ class DILocation : public MDNode {
   static
   const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs);
 
+  /// Return the masked discriminator value for an input discrimnator value D
+  /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
+  // Example: an input of (0x1FF, 7) returns 0xFF.
+  static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
+    return (D & getN1Bits(B));
+  }
+
+  /// Return the bits used for base discriminators.
+  static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; }
+
   /// Returns the base discriminator for a given encoded discriminator \p D.
   static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) {
+    if (EnableFSDiscriminator)
+      return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
     return getUnsignedFromPrefixEncoding(D);
   }
 
@@ -1785,6 +1768,8 @@ class DILocation : public MDNode {
   /// Returns the duplication factor for a given encoded discriminator \p D, or
   /// 1 if no value or 0 is encoded.
   static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
+    if (EnableFSDiscriminator)
+      return 1;
     D = getNextComponentInDiscriminator(D);
     unsigned Ret = getUnsignedFromPrefixEncoding(D);
     if (Ret == 0)
@@ -2226,6 +2211,14 @@ unsigned DILocation::getCopyIdentifier() const {
 
 Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
   unsigned BD, DF, CI;
+
+  if (EnableFSDiscriminator) {
+    BD = getBaseDiscriminator();
+    if (D == BD)
+      return this;
+    return cloneWithDiscriminator(D);
+  }
+
   decodeDiscriminator(getDiscriminator(), BD, DF, CI);
   if (D == BD)
     return this;
@@ -2235,6 +2228,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
 }
 
 Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+  assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
+
   DF *= getDuplicationFactor();
   if (DF <= 1)
     return this;

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
@@ -275,6 +275,7 @@ void initializeLowerSwitchLegacyPassPass(PassRegistry &);
 void initializeLowerTypeTestsPass(PassRegistry&);
 void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
 void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
+void initializeMIRAddFSDiscriminatorsPass(PassRegistry &);
 void initializeMIRCanonicalizerPass(PassRegistry &);
 void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry&);

diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
@@ -171,6 +171,9 @@ struct Config {
   bool ShouldDiscardValueNames = true;
   DiagnosticHandlerFunction DiagHandler;
 
+  /// Add FSAFDO discriminators.
+  bool AddFSDiscriminator = false;
+
   /// If this field is set, LTO will write input file paths and symbol
   /// resolutions here in llvm-lto2 command line flag format. This can be
   /// used for testing and for running the LTO pipeline outside of the linker

diff --git a/llvm/include/llvm/Support/Discriminator.h b/llvm/include/llvm/Support/Discriminator.h
@@ -0,0 +1,73 @@
+//===---- llvm/Support/Discriminator.h -- Discriminator Utils ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants and utility functions for discriminators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DISCRIMINATOR_H
+#define LLVM_SUPPORT_DISCRIMINATOR_H
+
+// Utility functions for encoding / decoding discriminators.
+/// With a given unsigned int \p U, use up to 13 bits to represent it.
+/// old_bit 1~5  --> new_bit 1~5
+/// old_bit 6~12 --> new_bit 7~13
+/// new_bit_6 is 0 if higher bits (7~13) are all 0
+static inline unsigned getPrefixEncodingFromUnsigned(unsigned U) {
+  U &= 0xfff;
+  return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
+}
+
+/// Reverse transformation as getPrefixEncodingFromUnsigned.
+static inline unsigned getUnsignedFromPrefixEncoding(unsigned U) {
+  if (U & 1)
+    return 0;
+  U >>= 1;
+  return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
+}
+
+/// Returns the next component stored in discriminator.
+static inline unsigned getNextComponentInDiscriminator(unsigned D) {
+  if ((D & 1) == 0)
+    return D >> ((D & 0x40) ? 14 : 7);
+  else
+    return D >> 1;
+}
+
+static inline unsigned encodeComponent(unsigned C) {
+  return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
+}
+
+static inline unsigned encodingBits(unsigned C) {
+  return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
+}
+
+// Some constants used in FS Discriminators.
+#define BASE_DIS_BIT_BEG 0
+#define BASE_DIS_BIT_END 7
+
+#define PASS_1_DIS_BIT_BEG 8
+#define PASS_1_DIS_BIT_END 13
+
+#define PASS_2_DIS_BIT_BEG 14
+#define PASS_2_DIS_BIT_END 19
+
+#define PASS_3_DIS_BIT_BEG 20
+#define PASS_3_DIS_BIT_END 25
+
+#define PASS_LAST_DIS_BIT_BEG 26
+#define PASS_LAST_DIS_BIT_END 31
+
+// Set bits range [0 .. n] to 1. Used in FS Discriminators.
+static inline unsigned getN1Bits(int N) {
+  if (N >= 31)
+    return 0xFFFFFFFF;
+  return (1 << (N + 1)) - 1;
+}
+
+#endif /* LLVM_SUPPORT_DISCRIMINATOR_H */
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
@@ -106,6 +106,7 @@ add_llvm_component_library(LLVMCodeGen
   MachineStripDebug.cpp
   MachineTraceMetrics.cpp
   MachineVerifier.cpp
+  MIRFSDiscriminator.cpp
   MIRYamlMapping.cpp
   ModuloSchedule.cpp
   MultiHazardRecognizer.cpp