Skip to content

Commit

Permalink
[x86/SLH] Teach speculative load hardening to correctly harden the
Browse files Browse the repository at this point in the history
indices used by AVX2 and AVX-512 gather instructions.

The index vector is hardened by broadcasting the predicate state
into a vector register and then or-ing. We don't even have to worry
about EFLAGS here.

I've added a test for all of the gather intrinsics to make sure that we
don't miss one. A particularly interesting creation is the gather
prefetch, which needs to be marked as potentially "loading" to get the
correct behavior. It's a memory access in many ways, and is actually
relevant for SLH. Based on discussion with Craig in review, I've moved
it to be `mayLoad` and `mayStore` rather than generic side effects. This
matches how we model other prefetch instructions.

Many thanks to Craig for the review here.

Differential Revision: https://reviews.llvm.org/D49336

llvm-svn: 337144
  • Loading branch information
chandlerc committed Jul 16, 2018
1 parent ccc8422 commit cdf0add
Show file tree
Hide file tree
Showing 3 changed files with 1,047 additions and 17 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -9588,7 +9588,7 @@ defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter",
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
RegisterClass KRC, X86MemOperand memop> {
let Predicates = [HasPFI], hasSideEffects = 1 in
let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
!strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
EVEX, EVEX_K, Sched<[WriteLoad]>;
Expand Down
107 changes: 91 additions & 16 deletions llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
Expand Up @@ -1398,29 +1398,104 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
}

for (MachineOperand *Op : HardenOpRegs) {
auto *OpRC = MRI->getRegClass(Op->getReg());

unsigned OpReg = Op->getReg();
auto *OpRC = MRI->getRegClass(OpReg);
unsigned TmpReg = MRI->createVirtualRegister(OpRC);

if (!EFLAGSLive) {
// Merge our potential poison state into the value with an or.
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
.addReg(StateReg)
.addReg(OpReg);
OrI->addRegisterDead(X86::EFLAGS, TRI);
// If this is a vector register, we'll need somewhat custom logic to handle
// hardening it.
if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);

// Move our state into a vector register.
// FIXME: We could skip this at the cost of longer encodings with AVX-512
// but that doesn't seem likely worth it.
unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
auto MovI =
BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
.addReg(StateReg);
(void)MovI;
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");

// Broadcast it across the vector register.
unsigned VBStateReg = MRI->createVirtualRegister(OpRC);
auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
TII->get(Is128Bit ? X86::VPBROADCASTQrr
: X86::VPBROADCASTQYrr),
VBStateReg)
.addReg(VStateReg);
(void)BroadcastI;
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
dbgs() << "\n");

// Merge our potential poison state into the value with a vector or.
auto OrI =
BuildMI(MBB, InsertPt, Loc,
TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
.addReg(VBStateReg)
.addReg(OpReg);
(void)OrI;
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
} else {
// We need to avoid touching EFLAGS so shift out all but the least
// significant bit using the instruction that doesn't update flags.
auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
.addReg(OpReg)
.addReg(StateReg);
(void)ShiftI;
} else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
if (Is128Bit || Is256Bit)
assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");

// Broadcast our state into a vector register.
unsigned VStateReg = MRI->createVirtualRegister(OpRC);
unsigned BroadcastOp =
Is128Bit ? X86::VPBROADCASTQrZ128r
: Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr;
auto BroadcastI =
BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
.addReg(StateReg);
(void)BroadcastI;
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
dbgs() << "\n");

// Merge our potential poison state into the value with a vector or.
unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
: Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
.addReg(VStateReg)
.addReg(OpReg);
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
} else {
// FIXME: Need to support GR32 here for 32-bit code.
assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
"Not a supported register class for address hardening!");

if (!EFLAGSLive) {
// Merge our potential poison state into the value with an or.
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
.addReg(StateReg)
.addReg(OpReg);
OrI->addRegisterDead(X86::EFLAGS, TRI);
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
} else {
// We need to avoid touching EFLAGS so shift out all but the least
// significant bit using the instruction that doesn't update flags.
auto ShiftI =
BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
.addReg(OpReg)
.addReg(StateReg);
(void)ShiftI;
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
dbgs() << "\n");
}
}

// Record this register as checked and update the operand.
Expand Down

0 comments on commit cdf0add

Please sign in to comment.