diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index f9bd233cf8ecf..434a6d2c3553f 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -31,7 +31,6 @@ set(sources X86CmovConversion.cpp X86CodeGenPassBuilder.cpp X86DomainReassignment.cpp - X86DiscriminateMemOps.cpp X86LowerTileCopy.cpp X86LowerAMXType.cpp X86LowerAMXIntrinsics.cpp @@ -57,7 +56,6 @@ set(sources X86IndirectBranchTracking.cpp X86IndirectThunks.cpp X86InterleavedAccess.cpp - X86InsertPrefetch.cpp X86InstCombineIntrinsic.cpp X86InstrFMA3Info.cpp X86InstrFoldTables.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 2b83d575ace91..39251f816b9cb 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -154,13 +154,6 @@ FunctionPass *createX86IndirectThunksPass(); /// This pass replaces ret instructions with jmp's to __x86_return thunk. FunctionPass *createX86ReturnThunksPass(); -/// This pass ensures instructions featuring a memory operand -/// have distinctive (with respect to each other) -FunctionPass *createX86DiscriminateMemOpsPass(); - -/// This pass applies profiling information to insert cache prefetches. -FunctionPass *createX86InsertPrefetchPass(); - /// This pass insert wait instruction after X87 instructions which could raise /// fp exceptions when strict-fp enabled. FunctionPass *createX86InsertX87waitPass(); diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp deleted file mode 100644 index bd151a450394a..0000000000000 --- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// This pass aids profile-driven cache prefetch insertion by ensuring all -/// instructions that have a memory operand are distinguishible from each other. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Support/Debug.h" -#include -using namespace llvm; - -#define DEBUG_TYPE "x86-discriminate-memops" - -static cl::opt EnableDiscriminateMemops( - DEBUG_TYPE, cl::init(false), - cl::desc("Generate unique debug info for each instruction with a memory " - "operand. Should be enabled for profile-driven cache prefetching, " - "both in the build of the binary being profiled, as well as in " - "the build of the binary consuming the profile."), - cl::Hidden); - -static cl::opt BypassPrefetchInstructions( - "x86-bypass-prefetch-instructions", cl::init(true), - cl::desc("When discriminating instructions with memory operands, ignore " - "prefetch instructions. This ensures the other memory operand " - "instructions have the same identifiers after inserting " - "prefetches, allowing for successive insertions."), - cl::Hidden); - -namespace { - -using Location = std::pair; - -Location diToLocation(const DILocation *Loc) { - return std::make_pair(Loc->getFilename(), Loc->getLine()); -} - -/// Ensure each instruction having a memory operand has a distinct pair. -void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) { - DebugLoc DL(Loc); - MI->setDebugLoc(DL); -} - -class X86DiscriminateMemOps : public MachineFunctionPass { - bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override { - return "X86 Discriminate Memory Operands"; - } - -public: - static char ID; - - /// Default construct and initialize the pass. - X86DiscriminateMemOps(); -}; - -bool IsPrefetchOpcode(unsigned Opcode) { - return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 || - Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2 || - Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1 || - Opcode == X86::PREFETCHRST2; -} -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86DiscriminateMemOps::ID = 0; - -/// Default construct and initialize the pass. -X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {} - -bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) { - if (!EnableDiscriminateMemops) - return false; - - DISubprogram *FDI = MF.getFunction().getSubprogram(); - if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling()) - return false; - - // Have a default DILocation, if we find instructions with memops that don't - // have any debug info. - const DILocation *ReferenceDI = - DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI); - assert(ReferenceDI && "ReferenceDI should not be nullptr"); - DenseMap MemOpDiscriminators; - MemOpDiscriminators[diToLocation(ReferenceDI)] = 0; - - // Figure out the largest discriminator issued for each Location. When we - // issue new discriminators, we can thus avoid issuing discriminators - // belonging to instructions that don't have memops. This isn't a requirement - // for the goals of this pass, however, it avoids unnecessary ambiguity. - for (auto &MBB : MF) { - for (auto &MI : MBB) { - const auto &DI = MI.getDebugLoc(); - if (!DI) - continue; - if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode)) - continue; - Location Loc = diToLocation(DI); - unsigned &Disc = MemOpDiscriminators[Loc]; - Disc = std::max(Disc, DI->getBaseDiscriminator()); - } - } - - // Keep track of the discriminators seen at each Location. If an instruction's - // DebugInfo has a Location and discriminator we've already seen, replace its - // discriminator with a new one, to guarantee uniqueness. - DenseMap> Seen; - - bool Changed = false; - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0) - continue; - if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode)) - continue; - const DILocation *DI = MI.getDebugLoc(); - bool HasDebug = DI; - if (!HasDebug) { - DI = ReferenceDI; - } - Location L = diToLocation(DI); - DenseSet &Set = Seen[L]; - const std::pair::iterator, bool> TryInsert = - Set.insert(DI->getBaseDiscriminator()); - if (!TryInsert.second || !HasDebug) { - unsigned BF, DF, CI = 0; - DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI); - std::optional EncodedDiscriminator = - DILocation::encodeDiscriminator(MemOpDiscriminators[L] + 1, DF, CI); - - if (!EncodedDiscriminator) { - // FIXME(mtrofin): The assumption is that this scenario is infrequent/OK - // not to support. If evidence points otherwise, we can explore synthesizeing - // unique DIs by adding fake line numbers, or by constructing 64 bit - // discriminators. - LLVM_DEBUG(dbgs() << "Unable to create a unique discriminator " - "for instruction with memory operand in: " - << DI->getFilename() << " Line: " << DI->getLine() - << " Column: " << DI->getColumn() - << ". This is likely due to a large macro expansion. \n"); - continue; - } - // Since we were able to encode, bump the MemOpDiscriminators. - ++MemOpDiscriminators[L]; - DI = DI->cloneWithDiscriminator(*EncodedDiscriminator); - assert(DI && "DI should not be nullptr"); - updateDebugInfo(&MI, DI); - Changed = true; - std::pair::iterator, bool> MustInsert = - Set.insert(DI->getBaseDiscriminator()); - (void)MustInsert; // Silence warning in release build. - assert(MustInsert.second && "New discriminator shouldn't be present in set"); - } - - // Bump the reference DI to avoid cramming discriminators on line 0. - // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI - // in a block. It's more consistent than just relying on the last memop - // instruction we happened to see. - ReferenceDI = DI; - } - } - return Changed; -} - -FunctionPass *llvm::createX86DiscriminateMemOpsPass() { - return new X86DiscriminateMemOps(); -} diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp deleted file mode 100644 index 953b755a0ca4c..0000000000000 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ /dev/null @@ -1,259 +0,0 @@ -//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass applies cache prefetch instructions based on a profile. The pass -// assumes DiscriminateMemOps ran immediately before, to ensure debug info -// matches the one used at profile generation time. The profile is encoded in -// afdo format (text or binary). It contains prefetch hints recommendations. -// Each recommendation is made in terms of debug info locations, a type (i.e. -// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a -// memory operand (see X86DiscriminateMemOps). The prefetch will be made for -// a location at that memory operand + the delta specified in the -// recommendation. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Module.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Transforms/IPO/SampleProfile.h" -using namespace llvm; -using namespace sampleprof; - -static cl::opt - PrefetchHintsFile("prefetch-hints-file", - cl::desc("Path to the prefetch hints profile. See also " - "-x86-discriminate-memops"), - cl::Hidden); -namespace { - -class X86InsertPrefetch : public MachineFunctionPass { - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &) override; - - bool runOnMachineFunction(MachineFunction &MF) override; - struct PrefetchInfo { - unsigned InstructionID; - int64_t Delta; - }; - typedef SmallVectorImpl Prefetches; - bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, - Prefetches &prefetches) const; - -public: - static char ID; - X86InsertPrefetch(const std::string &PrefetchHintsFilename); - StringRef getPassName() const override { - return "X86 Insert Cache Prefetches"; - } - -private: - std::string Filename; - std::unique_ptr Reader; -}; - -using PrefetchHints = SampleRecord::CallTargetMap; - -// Return any prefetching hints for the specified MachineInstruction. The hints -// are returned as pairs (name, delta). -ErrorOr -getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { - if (const auto &Loc = MI.getDebugLoc()) - if (const auto *Samples = TopSamples->findFunctionSamples(Loc)) - return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), - Loc->getBaseDiscriminator()); - return std::error_code(); -} - -// The prefetch instruction can't take memory operands involving vector -// registers. -bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { - Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); - Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); - return (BaseReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && - (IndexReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)); -} - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86InsertPrefetch::ID = 0; - -X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) - : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} - -/// Return true if the provided MachineInstruction has cache prefetch hints. In -/// that case, the prefetch hints are stored, in order, in the Prefetches -/// vector. -bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, - const MachineInstr &MI, - Prefetches &Prefetches) const { - assert(Prefetches.empty() && - "Expected caller passed empty PrefetchInfo vector."); - - // There is no point to match prefetch hints if the profile is using MD5. - if (FunctionSamples::UseMD5) - return false; - - static constexpr std::pair HintTypes[] = { - {"_nta_", X86::PREFETCHNTA}, - {"_t0_", X86::PREFETCHT0}, - {"_t1_", X86::PREFETCHT1}, - {"_t2_", X86::PREFETCHT2}, - }; - static const char *SerializedPrefetchPrefix = "__prefetch"; - - auto T = getPrefetchHints(TopSamples, MI); - if (!T) - return false; - int16_t max_index = -1; - // Convert serialized prefetch hints into PrefetchInfo objects, and populate - // the Prefetches vector. - for (const auto &S_V : *T) { - StringRef Name = S_V.first.stringRef(); - if (Name.consume_front(SerializedPrefetchPrefix)) { - int64_t D = static_cast(S_V.second); - unsigned IID = 0; - for (const auto &HintType : HintTypes) { - if (Name.consume_front(HintType.first)) { - IID = HintType.second; - break; - } - } - if (IID == 0) - return false; - uint8_t index = 0; - Name.consumeInteger(10, index); - - if (index >= Prefetches.size()) - Prefetches.resize(index + 1); - Prefetches[index] = {IID, D}; - max_index = std::max(max_index, static_cast(index)); - } - } - assert(max_index + 1 >= 0 && - "Possible overflow: max_index + 1 should be positive."); - assert(static_cast(max_index + 1) == Prefetches.size() && - "The number of prefetch hints received should match the number of " - "PrefetchInfo objects returned"); - return !Prefetches.empty(); -} - -bool X86InsertPrefetch::doInitialization(Module &M) { - if (Filename.empty()) - return false; - - LLVMContext &Ctx = M.getContext(); - // TODO: Propagate virtual file system into LLVM targets. - auto FS = vfs::getRealFileSystem(); - ErrorOr> ReaderOrErr = - SampleProfileReader::create(Filename, Ctx, *FS); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, - DiagnosticSeverity::DS_Warning)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - Reader->read(); - return true; -} - -void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { - if (!Reader) - return false; - const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction()); - if (!Samples) - return false; - - bool Changed = false; - - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - SmallVector Prefetches; - for (auto &MBB : MF) { - for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { - auto Current = MI; - ++MI; - - int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags); - if (Offset < 0) - continue; - unsigned Bias = X86II::getOperandBias(Current->getDesc()); - int MemOpOffset = Offset + Bias; - // FIXME(mtrofin): ORE message when the recommendation cannot be taken. - if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset)) - continue; - Prefetches.clear(); - if (!findPrefetchInfo(Samples, *Current, Prefetches)) - continue; - assert(!Prefetches.empty() && - "The Prefetches vector should contain at least a value if " - "findPrefetchInfo returned true."); - for (auto &PrefInfo : Prefetches) { - unsigned PFetchInstrID = PrefInfo.InstructionID; - int64_t Delta = PrefInfo.Delta; - const MCInstrDesc &Desc = TII->get(PFetchInstrID); - MachineInstr *PFetch = - MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true); - MachineInstrBuilder MIB(MF, PFetch); - - static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && - X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && - X86::AddrSegmentReg == 4, - "Unexpected change in X86 operand offset order."); - - // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. - // FIXME(mtrofin): consider adding a: - // MachineInstrBuilder::set(unsigned offset, op). - MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg()) - .addImm( - Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm()) - .addReg( - Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg()) - .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() + - Delta) - .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg) - .getReg()); - - if (!Current->memoperands_empty()) { - MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); - MIB.addMemOperand(MF.getMachineMemOperand( - CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize())); - } - - // Insert before Current. This is because Current may clobber some of - // the registers used to describe the input memory operand. - MBB.insert(Current, PFetch); - Changed = true; - } - } - } - return Changed; -} - -FunctionPass *llvm::createX86InsertPrefetchPass() { - return new X86InsertPrefetch(PrefetchHintsFile); -} diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index d4ad98af9b30c..1bcc6f50b375a 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -563,8 +563,6 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupVectorConstants()); } addPass(createX86CompressEVEXPass()); - addPass(createX86DiscriminateMemOpsPass()); - addPass(createX86InsertPrefetchPass()); addPass(createX86InsertX87waitPass()); } diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 0fbfb42d2a4dd..78a02b11b17bb 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -68,8 +68,6 @@ ; CHECK-NEXT: X86 Indirect Branch Tracking ; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: Compressing EVEX instrs when possible -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll deleted file mode 100644 index 6bbf3eb307da3..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops-missing-info.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.prefetch(ptr, i32, i32, i32) -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom - %0 = load i32, ptr %arrayidx, align 4 - %idxprom1 = sext i32 %pos2 to i64 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1 - %1 = load i32, ptr %arrayidx2, align 4 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!15 = !DILocation(line: 2, column: 20, scope: !7) - - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: .loc 1 1 0 {{.*}} discriminator 2 -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll deleted file mode 100644 index ca412c590b2e3..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops-skip-pfetch.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; RUN: llc -x86-discriminate-memops -x86-bypass-prefetch-instructions=0 < %s | FileCheck %s -check-prefix=NOBYPASS -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.prefetch(ptr, i32, i32, i32) -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !9 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !9 - %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !10 - %idxprom1 = sext i32 %pos2 to i64, !dbg !14 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !14 - call void @llvm.prefetch(ptr %arrayidx2, i32 0, i32 3, i32 1) - %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !10 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add, !dbg !16 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!14 = !DILocation(line: 2, column: 22, scope: !7) -!15 = !DILocation(line: 2, column: 20, scope: !7) -!16 = !DILocation(line: 2, column: 3, scope: !7) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: prefetcht0 (%rdi,%rax,4) -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 discriminator 2 # test.cc:2:20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 2 3 # test.cc:2:3 - -;NOBYPASS-LABEL: sum: -;NOBYPASS: # %bb.0: -;NOBYPASS: prefetcht0 (%rdi,%rax,4) -;NOBYPASS-NEXT: .loc 1 2 22 -;NOBYPASS-NEXT: movl (%rdi,%rax,4), %eax -;NOBYPASS-NEXT: .loc 1 2 20 {{.*}} discriminator 2 # test.cc:2:20 -;NOBYPASS-NEXT: addl (%rdi,%rcx,4), %eax -;NOBYPASS-NEXT: .loc 1 2 3 # test.cc:2:3 diff --git a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll b/llvm/test/CodeGen/X86/discriminate-mem-ops.ll deleted file mode 100644 index a8421d9506a87..0000000000000 --- a/llvm/test/CodeGen/X86/discriminate-mem-ops.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc -x86-discriminate-memops < %s | FileCheck %s -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !9 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !9 - %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !10 - %idxprom1 = sext i32 %pos2 to i64, !dbg !14 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !14 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !10 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add, !dbg !16 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!14 = !DILocation(line: 2, column: 22, scope: !7) -!15 = !DILocation(line: 2, column: 20, scope: !7) -!16 = !DILocation(line: 2, column: 3, scope: !7) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 discriminator 2 # test.cc:2:20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 2 3 # test.cc:2:3 diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo deleted file mode 100644 index 935b707ff1072..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo +++ /dev/null @@ -1,4 +0,0 @@ -caller:0:0 - 2: sum:0 - 3: 0 __prefetch_nta_0:23456 - 3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64 \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll b/llvm/test/CodeGen/X86/insert-prefetch-inline.ll deleted file mode 100644 index 05f542799c08b..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-inline.ll +++ /dev/null @@ -1,76 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s -; -; Verify we can insert prefetch instructions in code belonging to inlined -; functions. -; -; ModuleID = 'test.cc' - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @sum(ptr nocapture readonly %arr, i32 %pos1, i32 %pos2) local_unnamed_addr #0 !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !10 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !10 - %0 = load i32, ptr %arrayidx, align 4, !dbg !10, !tbaa !11 - %idxprom1 = sext i32 %pos2 to i64, !dbg !15 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !15 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !15, !tbaa !11 - %add = add nsw i32 %1, %0, !dbg !16 - ret i32 %add, !dbg !17 -} - -; "caller" inlines "sum". The associated .afdo file references instructions -; in "caller" that came from "sum"'s inlining. -; -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @caller(ptr nocapture readonly %arr) local_unnamed_addr #0 !dbg !18 { -entry: - %0 = load i32, ptr %arr, align 4, !dbg !19, !tbaa !11 - %arrayidx2.i = getelementptr inbounds i32, ptr %arr, i64 2, !dbg !21 - %1 = load i32, ptr %arrayidx2.i, align 4, !dbg !21, !tbaa !11 - %add.i = add nsw i32 %1, %0, !dbg !22 - ret i32 %add.i, !dbg !23 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !8, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DIFile(filename: "./test.h", directory: "/tmp") -!9 = !DISubroutineType(types: !2) -!10 = !DILocation(line: 6, column: 10, scope: !7) -!11 = !{!12, !12, i64 0} -!12 = !{!"int", !13, i64 0} -!13 = !{!"omnipotent char", !14, i64 0} -!14 = !{!"Simple C++ TBAA"} -!15 = !DILocation(line: 6, column: 22, scope: !7) -!16 = !DILocation(line: 6, column: 20, scope: !7) -!17 = !DILocation(line: 6, column: 3, scope: !7) -!18 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 4, type: !9, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!19 = !DILocation(line: 6, column: 10, scope: !7, inlinedAt: !20) -!20 = distinct !DILocation(line: 6, column: 10, scope: !18) -!21 = !DILocation(line: 6, column: 22, scope: !7, inlinedAt: !20) -!22 = !DILocation(line: 6, column: 20, scope: !7, inlinedAt: !20) -!23 = !DILocation(line: 6, column: 3, scope: !18) - -; CHECK-LABEL: caller: -; CHECK-LABEL: # %bb.0: -; CHECK-NEXT: .loc 1 6 22 prologue_end -; CHECK-NEXT: prefetchnta 23464(%rdi) -; CHECK-NEXT: movl 8(%rdi), %eax -; CHECK-NEXT: .loc 1 6 20 is_stmt 0 discriminator 2 -; CHECK-NEXT: prefetchnta 8764(%rdi) -; CHECK-NEXT: prefetchnta 64(%rdi) -; CHECK-NEXT: addl (%rdi), %eax diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo deleted file mode 100644 index 6385a498b8f92..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo +++ /dev/null @@ -1,2 +0,0 @@ -main:0:0 - 6: 0 __prefetch_nta_0:42 \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll deleted file mode 100644 index f8e25028cfdee..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s -; ModuleID = 'prefetch.cc' -source_filename = "prefetch.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { -entry: - tail call void @llvm.prefetch(ptr inttoptr (i64 291 to ptr), i32 0, i32 0, i32 1), !dbg !9 - ret i32 291, !dbg !11 -} - -; Function Attrs: inaccessiblemem_or_argmemonly nounwind -declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1 - -attributes #0 = {"target-cpu"="x86-64" "target-features"="+sse4.2,+ssse3"} -attributes #1 = { inaccessiblemem_or_argmemonly nounwind } -attributes #2 = { argmemonly nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"} -!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 12, column: 3, scope: !7) -!10 = !DILocation(line: 14, column: 3, scope: !7) -!11 = !DILocation(line: 15, column: 3, scope: !7) - -;CHECK-LABEL: main: -;CHECK: # %bb.0: -;CHECK: prefetchnta 291 -;CHECK-NOT: prefetchnta 42(%rax,%ymm0) diff --git a/llvm/test/CodeGen/X86/insert-prefetch-other.afdo b/llvm/test/CodeGen/X86/insert-prefetch-other.afdo deleted file mode 100644 index 783da34f7f84c..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch-other.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_t0_1:0 __prefetch_t2_0:42 - 1.1: 0 __prefetch_t1_0:18446744073709551615 diff --git a/llvm/test/CodeGen/X86/insert-prefetch.afdo b/llvm/test/CodeGen/X86/insert-prefetch.afdo deleted file mode 100644 index 96487e85eaaf2..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_nta_1:0 __prefetch_nta_0:42 - 1.1: 0 __prefetch_nta_0:18446744073709551615 diff --git a/llvm/test/CodeGen/X86/insert-prefetch.ll b/llvm/test/CodeGen/X86/insert-prefetch.ll deleted file mode 100644 index 971a6193862d0..0000000000000 --- a/llvm/test/CodeGen/X86/insert-prefetch.ll +++ /dev/null @@ -1,101 +0,0 @@ -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s -; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; NOTE: debug line numbers were adjusted such that the function would start -; at line 15 (an arbitrary number). The sample profile file format uses -; offsets from the start of the symbol instead of file-relative line numbers. -; The .afdo file reflects that - the instructions are offset '1'. -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @sum(ptr %arr, i32 %pos1, i32 %pos2) !dbg !35 !prof !37 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !38 - %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %idxprom, !dbg !38 - %0 = load i32, ptr %arrayidx, align 4, !dbg !38, !tbaa !39 - %idxprom1 = sext i32 %pos2 to i64, !dbg !43 - %arrayidx2 = getelementptr inbounds i32, ptr %arr, i64 %idxprom1, !dbg !43 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !43, !tbaa !39 - %add = add nsw i32 %1, %0, !dbg !44 - ret i32 %add, !dbg !45 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!33} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{i32 1, !"ProfileSummary", !7} -!7 = !{!8, !9, !10, !11, !12, !13, !14, !15} -!8 = !{!"ProfileFormat", !"SampleProfile"} -!9 = !{!"TotalCount", i64 0} -!10 = !{!"MaxCount", i64 0} -!11 = !{!"MaxInternalCount", i64 0} -!12 = !{!"MaxFunctionCount", i64 0} -!13 = !{!"NumCounts", i64 2} -!14 = !{!"NumFunctions", i64 1} -!15 = !{!"DetailedSummary", !16} -!16 = !{!17, !18, !19, !20, !21, !22, !22, !23, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32} -!17 = !{i32 10000, i64 0, i32 0} -!18 = !{i32 100000, i64 0, i32 0} -!19 = !{i32 200000, i64 0, i32 0} -!20 = !{i32 300000, i64 0, i32 0} -!21 = !{i32 400000, i64 0, i32 0} -!22 = !{i32 500000, i64 0, i32 0} -!23 = !{i32 600000, i64 0, i32 0} -!24 = !{i32 700000, i64 0, i32 0} -!25 = !{i32 800000, i64 0, i32 0} -!26 = !{i32 900000, i64 0, i32 0} -!27 = !{i32 950000, i64 0, i32 0} -!28 = !{i32 990000, i64 0, i32 0} -!29 = !{i32 999000, i64 0, i32 0} -!30 = !{i32 999900, i64 0, i32 0} -!31 = !{i32 999990, i64 0, i32 0} -!32 = !{i32 999999, i64 0, i32 0} -!33 = !{!"clang version 7.0.0 (trunk 322593) (llvm/trunk 322526)"} -!35 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 15, type: !36, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!36 = !DISubroutineType(types: !2) -!37 = !{!"function_entry_count", i64 -1} -!38 = !DILocation(line: 16, column: 10, scope: !35) -!39 = !{!40, !40, i64 0} -!40 = !{!"int", !41, i64 0} -!41 = !{!"omnipotent char", !42, i64 0} -!42 = !{!"Simple C++ TBAA"} -!43 = !DILocation(line: 16, column: 22, scope: !35) -!44 = !DILocation(line: 16, column: 20, scope: !35) -!45 = !DILocation(line: 16, column: 3, scope: !35) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: prefetchnta 42(%rdi,%rax,4) -;CHECK-NEXT: prefetchnta (%rdi,%rax,4) -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;CHECK-NEXT: prefetchnta -1(%rdi,%rcx,4) -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 16 3 # test.cc:16:3 - -;OTHERS-LABEL: sum: -;OTHERS: # %bb.0: -;OTHERS: prefetcht2 42(%rdi,%rax,4) -;OTHERS-NEXT: prefetcht0 (%rdi,%rax,4) -;OTHERS-NEXT: movl (%rdi,%rax,4), %eax -;OTHERS-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;OTHERS-NEXT: prefetcht1 -1(%rdi,%rcx,4) -;OTHERS-NEXT: addl (%rdi,%rcx,4), %eax -;OTHERS-NEXT: .loc 1 16 3 # test.cc:16:3 diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 81390e59d0d0a..276232e27c000 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -208,8 +208,6 @@ ; CHECK-NEXT: X86 Fixup Inst Tuning ; CHECK-NEXT: X86 Fixup Vector Constants ; CHECK-NEXT: Compressing EVEX instrs when possible -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses