322 changes: 322 additions & 0 deletions llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s

# TODO: Fix the processor resource usage for zero-idiom YMM XOR instructions.
# Those vector XOR instructions should only consume 1cy of JFPU1 (instead
# of 2cy).

# LLVM-MCA-BEGIN ZERO-IDIOM-1

vaddps %ymm0, %ymm0, %ymm1
vxorps %ymm1, %ymm1, %ymm1
vblendps $2, %ymm1, %ymm2, %ymm3

# LLVM-MCA-END

# LLVM-MCA-BEGIN ZERO-IDIOM-2

vaddpd %ymm0, %ymm0, %ymm1
vxorpd %ymm1, %ymm1, %ymm1
vblendpd $2, %ymm1, %ymm2, %ymm3

# LLVM-MCA-END

# LLVM-MCA-BEGIN ZERO-IDIOM-3
vaddps %xmm0, %xmm1, %xmm2
vandnps %xmm2, %xmm2, %xmm3
# LLVM-MCA-END

# LLVM-MCA-BEGIN ZERO-IDIOM-4
vaddps %xmm0, %xmm1, %xmm2
vandnps %xmm2, %xmm2, %xmm3
# LLVM-MCA-END

# CHECK: [0] Code Region - ZERO-IDIOM-1

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 306
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: Block RThroughput: 3.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 1 1.00 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 1.00 vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . D=eeeER. . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE--R. . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D=eE-R . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .DeeeER. vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . D=eER. vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . D=eER vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: [1] Code Region - ZERO-IDIOM-2

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 306
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: Block RThroughput: 3.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 1 1.00 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 1 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] . D=eeeER. . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeE--R. . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D=eE-R . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . .DeeeER. vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . . D=eER. vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . . D=eER vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: [2] Code Region - ZERO-IDIOM-3

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
# CHECK-NEXT: Total Cycles: 105
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 1.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3

# CHECK: Timeline view:
# CHECK-NEXT: Index 01234567

# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3

# CHECK: [3] Code Region - ZERO-IDIOM-4

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
# CHECK-NEXT: Total Cycles: 105
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 1.90
# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 1.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3

# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1
# CHECK-NEXT: [2] - JDiv
# CHECK-NEXT: [3] - JFPA
# CHECK-NEXT: [4] - JFPM
# CHECK-NEXT: [5] - JFPU0
# CHECK-NEXT: [6] - JFPU1
# CHECK-NEXT: [7] - JLAGU
# CHECK-NEXT: [8] - JMul
# CHECK-NEXT: [9] - JSAGU
# CHECK-NEXT: [10] - JSTC
# CHECK-NEXT: [11] - JVALU0
# CHECK-NEXT: [12] - JVALU1
# CHECK-NEXT: [13] - JVIMUL

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3

# CHECK: Timeline view:
# CHECK-NEXT: Index 01234567

# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3
30 changes: 25 additions & 5 deletions llvm/tools/llvm-mca/lib/InstrBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,12 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);

// Check if this is a dependency breaking instruction.
bool IsDepBreaking = MCIA.isDependencyBreaking(STI, MCI);
// FIXME: this is a temporary hack to identify zero-idioms.
bool IsZeroIdiom = D.isZeroLatency() && IsDepBreaking;
APInt Mask;

unsigned ProcID = STI.getSchedModel().getProcessorID();
bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
bool IsDepBreaking =
IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);

// Initialize Reads first.
for (const ReadDescriptor &RD : D.Reads) {
Expand All @@ -451,8 +454,25 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
assert(RegID > 0 && "Invalid register ID found!");
auto RS = llvm::make_unique<ReadState>(RD, RegID);

if (IsDepBreaking && !RD.isImplicitRead())
RS->setIndependentFromDef();
if (IsDepBreaking) {
// A mask of all zeroes means: explicit input operands are not
// independent.
if (Mask.isNullValue()) {
if (!RD.isImplicitRead())
RS->setIndependentFromDef();
} else {
// Check if this register operand is independent according to `Mask`.
// Note that Mask may not have enough bits to describe all explicit and
// implicit input operands. If this register operand doesn't have a
// corresponding bit in Mask, then conservatively assume that it is
// dependent.
if (Mask.getBitWidth() > RD.UseIndex) {
// Okay. This map describe register use `RD.UseIndex`.
if (Mask[RD.UseIndex])
RS->setIndependentFromDef();
}
}
}
NewIS->getUses().emplace_back(std::move(RS));
}

Expand Down
212 changes: 212 additions & 0 deletions llvm/utils/TableGen/CodeGenSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,221 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
// Check MCInstPredicate definitions.
checkMCInstPredicates();

// Check STIPredicate definitions.
checkSTIPredicates();

// Find STIPredicate definitions for each processor model, and construct
// STIPredicateFunction objects.
collectSTIPredicates();

checkCompleteness();
}

void CodeGenSchedModels::checkSTIPredicates() const {
DenseMap<StringRef, const Record *> Declarations;

// There cannot be multiple declarations with the same name.
const RecVec Decls = Records.getAllDerivedDefinitions("STIPredicateDecl");
for (const Record *R : Decls) {
StringRef Name = R->getValueAsString("Name");
const auto It = Declarations.find(Name);
if (It == Declarations.end()) {
Declarations[Name] = R;
continue;
}

PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared.");
PrintNote(It->second->getLoc(), "Previous declaration was here.");
PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found.");
}

// Disallow InstructionEquivalenceClasses with an empty instruction list.
const RecVec Defs =
Records.getAllDerivedDefinitions("InstructionEquivalenceClass");
for (const Record *R : Defs) {
RecVec Opcodes = R->getValueAsListOfDefs("Opcodes");
if (Opcodes.empty()) {
PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass "
"defined with an empty opcode list.");
}
}
}

// Used by function `processSTIPredicate` to construct a mask of machine
// instruction operands.
static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
APInt OperandMask;
if (Indices.empty())
return OperandMask;

int64_t MaxIndex = *std::max_element(Indices.begin(), Indices.end());
assert(MaxIndex >= 0 && "Invalid negative indices in input!");
OperandMask = OperandMask.zext(MaxIndex + 1);
for (const int64_t Index : Indices) {
assert(Index >= 0 && "Invalid negative indices!");
OperandMask.setBit(Index);
}

return OperandMask;
}

static void
processSTIPredicate(STIPredicateFunction &Fn,
const DenseMap<Record *, unsigned> &ProcModelMap) {
DenseMap<const Record *, unsigned> Opcode2Index;
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
std::vector<OpcodeMapPair> OpcodeMappings;
std::vector<std::pair<APInt, APInt>> OpcodeMasks;

DenseMap<const Record *, unsigned> Predicate2Index;
unsigned NumUniquePredicates = 0;

// Number unique predicates and opcodes used by InstructionEquivalenceClass
// definitions. Each unique opcode will be associated with an OpcodeInfo
// object.
for (const Record *Def : Fn.getDefinitions()) {
RecVec Classes = Def->getValueAsListOfDefs("Classes");
for (const Record *EC : Classes) {
const Record *Pred = EC->getValueAsDef("Predicate");
if (Predicate2Index.find(Pred) == Predicate2Index.end())
Predicate2Index[Pred] = NumUniquePredicates++;

RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
for (const Record *Opcode : Opcodes) {
if (Opcode2Index.find(Opcode) == Opcode2Index.end()) {
Opcode2Index[Opcode] = OpcodeMappings.size();
OpcodeMappings.emplace_back(Opcode, OpcodeInfo());
}
}
}
}

// Initialize vector `OpcodeMasks` with default values. We want to keep track
// of which processors "use" which opcodes. We also want to be able to
// identify predicates that are used by different processors for a same
// opcode.
// This information is used later on by this algorithm to sort OpcodeMapping
// elements based on their processor and predicate sets.
OpcodeMasks.resize(OpcodeMappings.size());
APInt DefaultProcMask(ProcModelMap.size(), 0);
APInt DefaultPredMask(NumUniquePredicates, 0);
for (std::pair<APInt, APInt> &MaskPair : OpcodeMasks)
MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask);

// Construct a OpcodeInfo object for every unique opcode declared by an
// InstructionEquivalenceClass definition.
for (const Record *Def : Fn.getDefinitions()) {
RecVec Classes = Def->getValueAsListOfDefs("Classes");
const Record *SchedModel = Def->getValueAsDef("SchedModel");
unsigned ProcIndex = ProcModelMap.find(SchedModel)->second;
APInt ProcMask(ProcModelMap.size(), 0);
ProcMask.setBit(ProcIndex);

for (const Record *EC : Classes) {
RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");

std::vector<int64_t> OpIndices =
EC->getValueAsListOfInts("OperandIndices");
APInt OperandMask = constructOperandMask(OpIndices);

const Record *Pred = EC->getValueAsDef("Predicate");
APInt PredMask(NumUniquePredicates, 0);
PredMask.setBit(Predicate2Index[Pred]);

for (const Record *Opcode : Opcodes) {
unsigned OpcodeIdx = Opcode2Index[Opcode];
if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) {
std::string Message =
"Opcode " + Opcode->getName().str() +
" used by multiple InstructionEquivalenceClass definitions.";
PrintFatalError(EC->getLoc(), Message);
}
OpcodeMasks[OpcodeIdx].first |= ProcMask;
OpcodeMasks[OpcodeIdx].second |= PredMask;
OpcodeInfo &OI = OpcodeMappings[OpcodeIdx].second;

OI.addPredicateForProcModel(ProcMask, OperandMask, Pred);
}
}
}

// Sort OpcodeMappings elements based on their CPU and predicate masks.
// As a last resort, order elements by opcode identifier.
llvm::sort(OpcodeMappings.begin(), OpcodeMappings.end(),
[&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
unsigned LhsIdx = Opcode2Index[Lhs.first];
unsigned RhsIdx = Opcode2Index[Rhs.first];
std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];

if (LhsMasks.first != RhsMasks.first) {
if (LhsMasks.first.countPopulation() <
RhsMasks.first.countPopulation())
return true;
return LhsMasks.first.countLeadingZeros() >
RhsMasks.first.countLeadingZeros();
}

if (LhsMasks.second != RhsMasks.second) {
if (LhsMasks.second.countPopulation() <
RhsMasks.second.countPopulation())
return true;
return LhsMasks.second.countLeadingZeros() >
RhsMasks.second.countLeadingZeros();
}

return LhsIdx < RhsIdx;
});

// Now construct opcode groups. Groups are used by the SubtargetEmitter when
// expanding the body of a STIPredicate function. In particular, each opcode
// group is expanded into a sequence of labels in a switch statement.
// It identifies opcodes for which different processors define same predicates
// and same opcode masks.
for (OpcodeMapPair &Info : OpcodeMappings)
Fn.addOpcode(Info.first, std::move(Info.second));
}

void CodeGenSchedModels::collectSTIPredicates() {
// Map STIPredicateDecl records to elements of vector
// CodeGenSchedModels::STIPredicates.
DenseMap<const Record *, unsigned> Decl2Index;

RecVec RV = Records.getAllDerivedDefinitions("STIPredicate");
for (const Record *R : RV) {
const Record *Decl = R->getValueAsDef("Declaration");

const auto It = Decl2Index.find(Decl);
if (It == Decl2Index.end()) {
Decl2Index[Decl] = STIPredicates.size();
STIPredicateFunction Predicate(Decl);
Predicate.addDefinition(R);
STIPredicates.emplace_back(std::move(Predicate));
continue;
}

STIPredicateFunction &PreviousDef = STIPredicates[It->second];
PreviousDef.addDefinition(R);
}

for (STIPredicateFunction &Fn : STIPredicates)
processSTIPredicate(Fn, ProcModelMap);
}

void OpcodeInfo::addPredicateForProcModel(const llvm::APInt &CpuMask,
const llvm::APInt &OperandMask,
const Record *Predicate) {
auto It = llvm::find_if(
Predicates, [&OperandMask, &Predicate](const PredicateInfo &P) {
return P.Predicate == Predicate && P.OperandMask == OperandMask;
});
if (It == Predicates.end()) {
Predicates.emplace_back(CpuMask, OperandMask, Predicate);
return;
}
It->ProcModelMask |= CpuMask;
}

void CodeGenSchedModels::checkMCInstPredicates() const {
RecVec MCPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
if (MCPredicates.empty())
Expand Down
141 changes: 141 additions & 0 deletions llvm/utils/TableGen/CodeGenSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#ifndef LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
#define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
Expand Down Expand Up @@ -270,6 +271,137 @@ struct CodeGenProcModel {
#endif
};

/// Used to correlate instructions to MCInstPredicates specified by
/// InstructionEquivalentClass tablegen definitions.
///
/// Example: a XOR of a register with self, is a known zero-idiom for most
/// X86 processors.
///
/// Each processor can use a (potentially different) InstructionEquivalenceClass
/// definition to classify zero-idioms. That means, XORrr is likely to appear
/// in more than one equivalence class (where each class definition is
/// contributed by a different processor).
///
/// There is no guarantee that the same MCInstPredicate will be used to describe
/// equivalence classes that identify XORrr as a zero-idiom.
///
/// To be more specific, the requirements for being a zero-idiom XORrr may be
/// different for different processors.
///
/// Class PredicateInfo identifies a subset of processors that specify the same
/// requirements (i.e. same MCInstPredicate and OperandMask) for an instruction
/// opcode.
///
/// Back to the example. Field `ProcModelMask` will have one bit set for every
/// processor model that sees XORrr as a zero-idiom, and that specifies the same
/// set of constraints.
///
/// By construction, there can be multiple instances of PredicateInfo associated
/// with a same instruction opcode. For example, different processors may define
/// different constraints on the same opcode.
///
/// Field OperandMask can be used as an extra constraint.
/// It may be used to describe conditions that appy only to a subset of the
/// operands of a machine instruction, and the operands subset may not be the
/// same for all processor models.
struct PredicateInfo {
llvm::APInt ProcModelMask; // A set of processor model indices.
llvm::APInt OperandMask; // An operand mask.
const Record *Predicate; // MCInstrPredicate definition.
PredicateInfo(llvm::APInt CpuMask, llvm::APInt Operands, const Record *Pred)
: ProcModelMask(CpuMask), OperandMask(Operands), Predicate(Pred) {}

bool operator==(const PredicateInfo &Other) const {
return ProcModelMask == Other.ProcModelMask &&
OperandMask == Other.OperandMask && Predicate == Other.Predicate;
}
};

/// A collection of PredicateInfo objects.
///
/// There is at least one OpcodeInfo object for every opcode specified by a
/// TIPredicate definition.
class OpcodeInfo {
llvm::SmallVector<PredicateInfo, 8> Predicates;

OpcodeInfo(const OpcodeInfo &Other) = delete;
OpcodeInfo &operator=(const OpcodeInfo &Other) = delete;

public:
OpcodeInfo() = default;
OpcodeInfo &operator=(OpcodeInfo &&Other) = default;
OpcodeInfo(OpcodeInfo &&Other) = default;

ArrayRef<PredicateInfo> getPredicates() const { return Predicates; }

void addPredicateForProcModel(const llvm::APInt &CpuMask,
const llvm::APInt &OperandMask,
const Record *Predicate);
};

/// Used to group together tablegen instruction definitions that are subject
/// to a same set of constraints (identified by an instance of OpcodeInfo).
class OpcodeGroup {
OpcodeInfo Info;
std::vector<const Record *> Opcodes;

OpcodeGroup(const OpcodeGroup &Other) = delete;
OpcodeGroup &operator=(const OpcodeGroup &Other) = delete;

public:
OpcodeGroup(OpcodeInfo &&OpInfo) : Info(std::move(OpInfo)) {}
OpcodeGroup(OpcodeGroup &&Other) = default;

void addOpcode(const Record *Opcode) {
assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() &&
"Opcode already in set!");
Opcodes.push_back(Opcode);
}

ArrayRef<const Record *> getOpcodes() const { return Opcodes; }
const OpcodeInfo &getOpcodeInfo() const { return Info; }
};

/// An STIPredicateFunction descriptor used by tablegen backends to
/// auto-generate the body of a predicate function as a member of tablegen'd
/// class XXXGenSubtargetInfo.
class STIPredicateFunction {
const Record *FunctionDeclaration;

std::vector<const Record *> Definitions;
std::vector<OpcodeGroup> Groups;

STIPredicateFunction(const STIPredicateFunction &Other) = delete;
STIPredicateFunction &operator=(const STIPredicateFunction &Other) = delete;

public:
STIPredicateFunction(const Record *Rec) : FunctionDeclaration(Rec) {}
STIPredicateFunction(STIPredicateFunction &&Other) = default;

bool isCompatibleWith(const STIPredicateFunction &Other) const {
return FunctionDeclaration == Other.FunctionDeclaration;
}

void addDefinition(const Record *Def) { Definitions.push_back(Def); }
void addOpcode(const Record *OpcodeRec, OpcodeInfo &&Info) {
if (Groups.empty() ||
Groups.back().getOpcodeInfo().getPredicates() != Info.getPredicates())
Groups.emplace_back(std::move(Info));
Groups.back().addOpcode(OpcodeRec);
}

StringRef getName() const {
return FunctionDeclaration->getValueAsString("Name");
}
const Record *getDefaultReturnPredicate() const {
return FunctionDeclaration->getValueAsDef("DefaultReturnValue");
}

const Record *getDeclaration() const { return FunctionDeclaration; }
ArrayRef<const Record *> getDefinitions() const { return Definitions; }
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
};

/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
Expand Down Expand Up @@ -303,6 +435,8 @@ class CodeGenSchedModels {
using InstClassMapTy = DenseMap<Record*, unsigned>;
InstClassMapTy InstrClassMap;

std::vector<STIPredicateFunction> STIPredicates;

public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);

Expand Down Expand Up @@ -430,6 +564,9 @@ class CodeGenSchedModels {
Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
ArrayRef<SMLoc> Loc) const;

ArrayRef<STIPredicateFunction> getSTIPredicates() const {
return STIPredicates;
}
private:
void collectProcModels();

Expand Down Expand Up @@ -467,6 +604,10 @@ class CodeGenSchedModels {

void checkMCInstPredicates() const;

void checkSTIPredicates() const;

void collectSTIPredicates();

void checkCompleteness();

void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
Expand Down
155 changes: 155 additions & 0 deletions llvm/utils/TableGen/PredicateExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "PredicateExpander.h"
#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.

namespace llvm {

Expand Down Expand Up @@ -313,4 +314,158 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
llvm_unreachable("No known rules to expand this MCInstPredicate");
}

void STIPredicateExpander::expandHeader(raw_ostream &OS,
const STIPredicateFunction &Fn) {
const Record *Rec = Fn.getDeclaration();
StringRef FunctionName = Rec->getValueAsString("Name");

OS.indent(getIndentLevel() * 2);
OS << "bool ";
if (shouldExpandDefinition())
OS << getClassPrefix() << "::";
OS << FunctionName << "(";
if (shouldExpandForMC())
OS << "const MCInst " << (isByRef() ? "&" : "*") << "MI";
else
OS << "const MachineInstr " << (isByRef() ? "&" : "*") << "MI";
if (Rec->getValueAsBit("UpdatesOpcodeMask"))
OS << ", APInt &Mask";
OS << (shouldExpandForMC() ? ", unsigned ProcessorID) const " : ") const ");
if (shouldExpandDefinition()) {
OS << "{\n";
return;
}

if (Rec->getValueAsBit("OverridesBaseClassMember"))
OS << "override";
OS << ";\n";
}

void STIPredicateExpander::expandPrologue(raw_ostream &OS,
const STIPredicateFunction &Fn) {
RecVec Delegates = Fn.getDeclaration()->getValueAsListOfDefs("Delegates");
bool UpdatesOpcodeMask =
Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");

increaseIndentLevel();
unsigned IndentLevel = getIndentLevel();
for (const Record *Delegate : Delegates) {
OS.indent(IndentLevel * 2);
OS << "if (" << Delegate->getValueAsString("Name") << "(MI";
if (UpdatesOpcodeMask)
OS << ", Mask";
if (shouldExpandForMC())
OS << ", ProcessorID";
OS << "))\n";
OS.indent((1 + IndentLevel) * 2);
OS << "return true;\n\n";
}

if (shouldExpandForMC())
return;

OS.indent(IndentLevel * 2);
OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n";
}

void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
bool ShouldUpdateOpcodeMask) {
const OpcodeInfo &OI = Group.getOpcodeInfo();
for (const PredicateInfo &PI : OI.getPredicates()) {
const APInt &ProcModelMask = PI.ProcModelMask;
bool FirstProcID = true;
for (unsigned I = 0, E = ProcModelMask.getActiveBits(); I < E; ++I) {
if (!ProcModelMask[I])
continue;

if (FirstProcID) {
OS.indent(getIndentLevel() * 2);
OS << "if (ProcessorID == " << I;
} else {
OS << " || ProcessorID == " << I;
}
FirstProcID = false;
}

OS << ") {\n";

increaseIndentLevel();
OS.indent(getIndentLevel() * 2);
if (ShouldUpdateOpcodeMask) {
if (PI.OperandMask.isNullValue())
OS << "Mask.clearAllBits();\n";
else
OS << "Mask = " << PI.OperandMask << ";\n";
OS.indent(getIndentLevel() * 2);
}
OS << "return ";
expandPredicate(OS, PI.Predicate);
OS << ";\n";
decreaseIndentLevel();
OS.indent(getIndentLevel() * 2);
OS << "}\n";
}
}

void STIPredicateExpander::expandBody(raw_ostream &OS,
const STIPredicateFunction &Fn) {
bool UpdatesOpcodeMask =
Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");

unsigned IndentLevel = getIndentLevel();
OS.indent(IndentLevel * 2);
OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
OS.indent(IndentLevel * 2);
OS << "default:\n";
OS.indent(IndentLevel * 2);
OS << " break;";

for (const OpcodeGroup &Group : Fn.getGroups()) {
for (const Record *Opcode : Group.getOpcodes()) {
OS << '\n';
OS.indent(IndentLevel * 2);
OS << "case " << getTargetName() << "::" << Opcode->getName() << ":";
}

OS << '\n';
increaseIndentLevel();
expandOpcodeGroup(OS, Group, UpdatesOpcodeMask);

OS.indent(getIndentLevel() * 2);
OS << "break;\n";
decreaseIndentLevel();
}

OS.indent(IndentLevel * 2);
OS << "}\n";
}

void STIPredicateExpander::expandEpilogue(raw_ostream &OS,
const STIPredicateFunction &Fn) {
OS << '\n';
OS.indent(getIndentLevel() * 2);
OS << "return ";
expandPredicate(OS, Fn.getDefaultReturnPredicate());
OS << ";\n";

decreaseIndentLevel();
OS.indent(getIndentLevel() * 2);
StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name");
OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n";
}

void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
const STIPredicateFunction &Fn) {
const Record *Rec = Fn.getDeclaration();
if (shouldExpandForMC() && !Rec->getValueAsBit("ExpandForMC"))
return;

expandHeader(OS, Fn);
if (shouldExpandDefinition()) {
expandPrologue(OS, Fn);
expandBody(OS, Fn);
expandEpilogue(OS, Fn);
}
}

} // namespace llvm
33 changes: 32 additions & 1 deletion llvm/utils/TableGen/PredicateExpander.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,15 @@ class PredicateExpander {
bool shouldNegate() const { return NegatePredicate; }
bool shouldExpandForMC() const { return ExpandForMC; }
unsigned getIndentLevel() const { return IndentLevel; }
StringRef getTargetName() const { return TargetName; }

void setByRef(bool Value) { EmitCallsByRef = Value; }
void flipNegatePredicate() { NegatePredicate = !NegatePredicate; }
void setNegatePredicate(bool Value) { NegatePredicate = Value; }
void setExpandForMC(bool Value) { ExpandForMC = Value; }
void setIndentLevel(unsigned Level) { IndentLevel = Level; }
void increaseIndentLevel() { ++IndentLevel; }
void decreaseIndentLevel() { --IndentLevel; }
void setIndentLevel(unsigned Level) { IndentLevel = Level; }

using RecVec = std::vector<Record *>;
void expandTrue(raw_ostream &OS);
Expand Down Expand Up @@ -81,6 +82,36 @@ class PredicateExpander {
void expandStatement(raw_ostream &OS, const Record *Rec);
};

// Forward declarations.
class STIPredicateFunction;
class OpcodeGroup;

class STIPredicateExpander : public PredicateExpander {
StringRef ClassPrefix;
bool ExpandDefinition;

STIPredicateExpander(const PredicateExpander &) = delete;
STIPredicateExpander &operator=(const PredicateExpander &) = delete;

void expandHeader(raw_ostream &OS, const STIPredicateFunction &Fn);
void expandPrologue(raw_ostream &OS, const STIPredicateFunction &Fn);
void expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
bool ShouldUpdateOpcodeMask);
void expandBody(raw_ostream &OS, const STIPredicateFunction &Fn);
void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn);

public:
STIPredicateExpander(StringRef Target)
: PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {}

bool shouldExpandDefinition() const { return ExpandDefinition; }
StringRef getClassPrefix() const { return ClassPrefix; }
void setClassPrefix(StringRef S) { ClassPrefix = S; }
void setExpandDefinition(bool Value) { ExpandDefinition = Value; }

void expandSTIPredicate(raw_ostream &OS, const STIPredicateFunction &Fn);
};

} // namespace llvm

#endif
45 changes: 44 additions & 1 deletion llvm/utils/TableGen/SubtargetEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class SubtargetEmitter {
void emitSchedModelHelpersImpl(raw_ostream &OS,
bool OnlyExpandMCInstPredicates = false);
void emitGenMCSubtargetInfo(raw_ostream &OS);
void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);

void EmitSchedModel(raw_ostream &OS);
void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
Expand Down Expand Up @@ -1672,7 +1673,16 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
<< " unsigned CPUID) const {\n"
<< " return " << Target << "_MC"
<< "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
<< "} // " << ClassName << "::resolveVariantSchedClass\n";
<< "} // " << ClassName << "::resolveVariantSchedClass\n\n";

STIPredicateExpander PE(Target);
PE.setClassPrefix(ClassName);
PE.setExpandDefinition(true);
PE.setByRef(false);
PE.setIndentLevel(0);

for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
PE.expandSTIPredicate(OS, Fn);
}

void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
Expand Down Expand Up @@ -1766,6 +1776,31 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
OS << "};\n";
}

void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n";
OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";

STIPredicateExpander PE(Target);
PE.setExpandForMC(true);
PE.setByRef(true);
for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
PE.expandSTIPredicate(OS, Fn);

OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";

OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n";
OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";

std::string ClassPrefix = Target + "MCInstrAnalysis";
PE.setExpandDefinition(true);
PE.setClassPrefix(ClassPrefix);
PE.setIndentLevel(0);
for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
PE.expandSTIPredicate(OS, Fn);

OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
}

//
// SubtargetEmitter::run - Main subtarget enumeration emitter.
//
Expand Down Expand Up @@ -1863,6 +1898,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
<< " const;\n";
if (TGT.getHwModes().getNumModeIds() > 1)
OS << " unsigned getHwMode() const override;\n";

STIPredicateExpander PE(Target);
PE.setByRef(false);
for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
PE.expandSTIPredicate(OS, Fn);

OS << "};\n"
<< "} // end namespace llvm\n\n";

Expand Down Expand Up @@ -1920,6 +1961,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "} // end namespace llvm\n\n";

OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";

EmitMCInstrAnalysisPredicateFunctions(OS);
}

namespace llvm {
Expand Down