diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index bbfb8a0dbe26a..4e7d6a964a11a 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -519,9 +519,6 @@ namespace llvm { // Expands large div/rem instructions. FunctionPass *createExpandLargeFpConvertPass(); - // This pass expands memcmp() to load/stores. - FunctionPass *createExpandMemCmpLegacyPass(); - /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp FunctionPass *createBreakFalseDeps(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f673bef2fd857..47e1c2c1489c3 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -102,8 +102,7 @@ void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&); -void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); -void initializeExpandMemCmpLegacyPassPass(PassRegistry &); +void initializeExpandLargeDivRemLegacyPassPass(PassRegistry &); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); void initializeExpandVectorPredicationPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 729b12dc29e6d..2f84cb199db9b 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -117,8 +117,7 @@ namespace { (void) llvm::createGVNPass(); (void) llvm::createPostDomTree(); (void) llvm::createMergeICmpsLegacyPass(); - (void) llvm::createExpandLargeDivRemPass(); - (void)llvm::createExpandMemCmpLegacyPass(); + (void)llvm::createExpandLargeDivRemPass(); (void) llvm::createExpandVectorPredicationPass(); std::string buf; llvm::raw_string_ostream os(buf); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 80bbfb75185a9..10894d2fde4d5 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -27,7 +27,6 @@ #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DeadMachineInstructionElim.h" #include "llvm/CodeGen/DwarfEHPrepare.h" -#include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FreeMachineFunction.h" #include "llvm/CodeGen/GCMetadata.h" @@ -590,16 +589,6 @@ void CodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } - if (getOptLevel() != CodeGenOptLevel::None) { - // The MergeICmpsPass tries to create memcmp calls by grouping sequences of - // loads and compares. ExpandMemCmpPass then tries to expand those calls - // into optimally-sized loads and compares. The transforms are enabled by a - // target lowering hook. - if (!Opt.DisableMergeICmps) - addPass(MergeICmpsPass()); - addPass(ExpandMemCmpPass(&TM)); - } - // Run GC lowering passes for builtin collectors // TODO: add a pass insertion point here addPass(GCLoweringPass()); diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h similarity index 75% rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h index 94a877854f327..3b5d3cab0d80e 100644 --- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h +++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H -#define LLVM_CODEGEN_EXPANDMEMCMP_H +#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H +#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H #include "llvm/IR/PassManager.h" @@ -16,14 +16,13 @@ namespace llvm { class TargetMachine; class ExpandMemCmpPass : public PassInfoMixin { - const TargetMachine *TM; public: - explicit ExpandMemCmpPass(const TargetMachine *TM_) : TM(TM_) {} + explicit ExpandMemCmpPass() {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; } // namespace llvm -#endif // LLVM_CODEGEN_EXPANDMEMCMP_H +#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index d49bcf8a0c8ee..30719aec5624f 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -57,7 +57,6 @@ add_llvm_component_library(LLVMCodeGen ExecutionDomainFix.cpp ExpandLargeDivRem.cpp ExpandLargeFpConvert.cpp - ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp ExpandVectorPredication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 418066452c172..f09cc36189dc5 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -41,7 +41,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyTailDuplicatePass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandLargeFpConvertLegacyPassPass(Registry); - initializeExpandMemCmpLegacyPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); initializeFinalizeISelPass(Registry); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 2ed39a5696e20..2ee54a67071ed 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -108,9 +108,6 @@ static cl::opt EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), cl::init(false), cl::Hidden); -static cl::opt DisableMergeICmps("disable-mergeicmps", - cl::desc("Disable MergeICmps Pass"), - cl::init(false), cl::Hidden); static cl::opt PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt @@ -487,7 +484,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(EnableImplicitNullChecks) SET_BOOLEAN_OPTION(EnableMachineOutliner) SET_BOOLEAN_OPTION(MISchedPostRA) - SET_BOOLEAN_OPTION(DisableMergeICmps) SET_BOOLEAN_OPTION(DisableLSR) SET_BOOLEAN_OPTION(DisableConstantHoisting) SET_BOOLEAN_OPTION(DisableCGP) @@ -827,14 +823,6 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } - - // The MergeICmpsPass tries to create memcmp calls by grouping sequences of - // loads and compares. ExpandMemCmpPass then tries to expand those calls - // into optimally-sized loads and compares. The transforms are enabled by a - // target lowering hook. - if (!DisableMergeICmps) - addPass(createMergeICmpsLegacyPass()); - addPass(createExpandMemCmpLegacyPass()); } // Run GC lowering passes for builtin collectors diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f26d95ab1e479..6266269237cd9 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -80,7 +80,6 @@ #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandLargeFpConvert.h" -#include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/FreeMachineFunction.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GlobalMerge.h" @@ -192,6 +191,7 @@ #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 142bd50b3798e..bf524af6d460c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -87,6 +87,7 @@ #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" @@ -113,6 +114,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/Reassociate.h" @@ -1457,6 +1459,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // flattening of blocks. OptimizePM.addPass(DivRemPairsPass()); + // Detect and convert memcmp like idioms to the call then expand them if + // profitable + OptimizePM.addPass(MergeICmpsPass()); + OptimizePM.addPass(ExpandMemCmpPass()); + // Try to annotate calls that were created during optimization. OptimizePM.addPass(TailCallElimPass()); @@ -1979,6 +1986,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // flattening of blocks. LateFPM.addPass(DivRemPairsPass()); + // Detect and convert memcmp like idioms to the call then expand them if + // profitable + LateFPM.addPass(MergeICmpsPass()); + LateFPM.addPass(ExpandMemCmpPass()); + // Delete basic blocks, which optimization passes may have killed. LateFPM.addPass(SimplifyCFGPass( SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 093c1f8aaad43..77dc8321e76ed 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -328,7 +328,6 @@ FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM)) -FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) @@ -374,6 +373,7 @@ FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("memprof", MemProfilerPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) +FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass()) FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass()) FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ba09ebf8b04c4..47cf18737ae20 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMScalarOpts DeadStoreElimination.cpp DFAJumpThreading.cpp DivRemPairs.cpp + ExpandMemCmp.cpp EarlyCSE.cpp FlattenCFGPass.cpp Float2Int.cpp diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp similarity index 89% rename from llvm/lib/CodeGen/ExpandMemCmp.cpp rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp index bb84813569f4d..a8577b5229ed7 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp @@ -11,21 +11,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ExpandMemCmp.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -35,10 +36,6 @@ using namespace llvm; using namespace llvm::PatternMatch; -namespace llvm { -class TargetLowering; -} - #define DEBUG_TYPE "expand-memcmp" STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); @@ -62,7 +59,6 @@ static cl::opt MaxLoadsPerMemcmpOptSize( namespace { - // This class provides helper functions to expand a memcmp library call into an // inline expansion. class MemCmpExpansion { @@ -92,8 +88,7 @@ class MemCmpExpansion { // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}. struct LoadEntry { LoadEntry(unsigned LoadSize, uint64_t Offset) - : LoadSize(LoadSize), Offset(Offset) { - } + : LoadSize(LoadSize), Offset(Offset) {} // The size of the load for this block, in bytes. unsigned LoadSize; @@ -305,6 +300,7 @@ unsigned MemCmpExpansion::getNumBlocks() { } void MemCmpExpansion::createLoadCmpBlocks() { + assert(ResBlock.BB && "ResBlock must be created before LoadCmpBlocks"); for (unsigned i = 0; i < getNumBlocks(); i++) { BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", EndBlock->getParent(), EndBlock); @@ -313,6 +309,7 @@ void MemCmpExpansion::createLoadCmpBlocks() { } void MemCmpExpansion::createResultBlock() { + assert(EndBlock && "EndBlock must be created before ResultBlock"); ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", EndBlock->getParent(), EndBlock); } @@ -724,7 +721,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { // calculate which source was larger. The calculation requires the // two loaded source values of each load compare block. // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); // Create the number of required load compare basic blocks. createLoadCmpBlocks(); @@ -828,9 +826,9 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] /// ret i32 %phi.res static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - DomTreeUpdater *DTU, const bool IsBCmp) { + const DataLayout *DL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, DomTreeUpdater *DTU, + const bool IsBCmp) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -845,40 +843,41 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, } const uint64_t SizeVal = SizeCast->getZExtValue(); - if (SizeVal == 0) { - return false; - } // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. const bool IsUsedForZeroCmp = IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); bool OptForSize = CI->getFunction()->hasOptSize() || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - auto Options = TTI->enableMemCmpExpansion(OptForSize, - IsUsedForZeroCmp); - if (!Options) return false; - - if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) - Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; + auto Options = TTI->enableMemCmpExpansion(OptForSize, IsUsedForZeroCmp); + if (!Options) + return false; + Value *Res = nullptr; - if (OptForSize && - MaxLoadsPerMemcmpOptSize.getNumOccurrences()) - Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; + if (SizeVal == 0) { + Res = ConstantInt::get(CI->getFunctionType()->getReturnType(), 0); + } else { + if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) + Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; - if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) - Options.MaxNumLoads = MaxLoadsPerMemcmp; + if (OptForSize && MaxLoadsPerMemcmpOptSize.getNumOccurrences()) + Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; - MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU); + if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) + Options.MaxNumLoads = MaxLoadsPerMemcmp; - // Don't expand if this will require more loads than desired by the target. - if (Expansion.getNumLoads() == 0) { - NumMemCmpGreaterThanMax++; - return false; - } + MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU); - NumMemCmpInlined++; + // Don't expand if this will require more loads than desired by the target. + if (Expansion.getNumLoads() == 0) { + NumMemCmpGreaterThanMax++; + return false; + } - if (Value *Res = Expansion.getMemCmpExpansion()) { + NumMemCmpInlined++; + Res = Expansion.getMemCmpExpansion(); + } + if (Res) { // Replace call with result of expansion and erase call. CI->replaceAllUsesWith(Res); CI->eraseFromParent(); @@ -889,64 +888,19 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, // Returns true if a change was made. static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, const TargetLowering *TL, - const DataLayout &DL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, DomTreeUpdater *DTU); + const TargetTransformInfo *TTI, const DataLayout &DL, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, + DomTreeUpdater *DTU); static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering *TL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, DominatorTree *DT); -class ExpandMemCmpLegacyPass : public FunctionPass { -public: - static char ID; - - ExpandMemCmpLegacyPass() : FunctionPass(ID) { - initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) return false; - - auto *TPC = getAnalysisIfAvailable(); - if (!TPC) { - return false; - } - const TargetLowering* TL = - TPC->getTM().getSubtargetImpl(F)->getTargetLowering(); - - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); - const TargetTransformInfo *TTI = - &getAnalysis().getTTI(F); - auto *PSI = &getAnalysis().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; - DominatorTree *DT = nullptr; - if (auto *DTWP = getAnalysisIfAvailable()) - DT = &DTWP->getDomTree(); - auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT); - return !PA.areAllPreserved(); - } - -private: - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); - FunctionPass::getAnalysisUsage(AU); - } -}; - bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, const TargetLowering *TL, - const DataLayout &DL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) { + const TargetTransformInfo *TTI, const DataLayout &DL, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, + DomTreeUpdater *DTU) { for (Instruction &I : BB) { CallInst *CI = dyn_cast(&I); if (!CI) { @@ -955,7 +909,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, LibFunc Func; if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { + expandMemCmp(CI, TTI, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { return true; } } @@ -964,16 +918,25 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering *TL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, DominatorTree *DT) { + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, + DominatorTree *DT) { + // Sanitizers prefer that calls to memcmp remain as such + // so that they may be itercepted, but since the sanitizer passes run late + // we disable the optimization here. See + // maybeMarkSanitizerLibraryCallNoBuiltin + if (F.hasFnAttribute(Attribute::SanitizeMemory) || + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress) || + F.hasFnAttribute(Attribute::SanitizeThread)) + return PreservedAnalyses::all(); std::optional DTU; if (DT) DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); - const DataLayout& DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { - if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) { + if (runOnBlock(*BBIt, TLI, TTI, DL, PSI, BFI, DTU ? &*DTU : nullptr)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. @@ -996,7 +959,6 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, PreservedAnalyses ExpandMemCmpPass::run(Function &F, FunctionAnalysisManager &FAM) { - const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); const auto &TLI = FAM.getResult(F); const auto &TTI = FAM.getResult(F); auto *PSI = FAM.getResult(F) @@ -1005,21 +967,5 @@ PreservedAnalyses ExpandMemCmpPass::run(Function &F, ? &FAM.getResult(F) : nullptr; auto *DT = FAM.getCachedResult(F); - - return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT); -} - -char ExpandMemCmpLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE, - "Expand memcmp() to load/stores", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE, - "Expand memcmp() to load/stores", false, false) - -FunctionPass *llvm::createExpandMemCmpLegacyPass() { - return new ExpandMemCmpLegacyPass(); + return runImpl(F, &TLI, &TTI, PSI, BFI, DT); } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 638f26298ee26..c96c1edebaf8c 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -43,13 +43,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll deleted file mode 100644 index 4846c46e64817..0000000000000 --- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ /dev/null @@ -1,98 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECKN -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu -mattr=strict-align | FileCheck %s --check-prefix=CHECKS - -declare i32 @bcmp(ptr, ptr, i64) nounwind readonly -declare i32 @memcmp(ptr, ptr, i64) nounwind readonly - -define i1 @test_b2(ptr %s1, ptr %s2) { -; CHECKN-LABEL: test_b2: -; CHECKN: // %bb.0: // %entry -; CHECKN-NEXT: ldr x8, [x0] -; CHECKN-NEXT: ldr x9, [x1] -; CHECKN-NEXT: ldur x10, [x0, #7] -; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: cmp x8, x9 -; CHECKN-NEXT: ccmp x10, x11, #0, eq -; CHECKN-NEXT: cset w0, eq -; CHECKN-NEXT: ret -; -; CHECKS-LABEL: test_b2: -; CHECKS: // %bb.0: // %entry -; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECKS-NEXT: .cfi_def_cfa_offset 16 -; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #15 // =0xf -; CHECKS-NEXT: bl bcmp -; CHECKS-NEXT: cmp w0, #0 -; CHECKS-NEXT: cset w0, eq -; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECKS-NEXT: ret -entry: - %bcmp = call i32 @bcmp(ptr %s1, ptr %s2, i64 15) - %ret = icmp eq i32 %bcmp, 0 - ret i1 %ret -} - -; TODO: Four loads should be within the limit, but the heuristic isn't implemented. -define i1 @test_b2_align8(ptr align 8 %s1, ptr align 8 %s2) { -; CHECKN-LABEL: test_b2_align8: -; CHECKN: // %bb.0: // %entry -; CHECKN-NEXT: ldr x8, [x0] -; CHECKN-NEXT: ldr x9, [x1] -; CHECKN-NEXT: ldur x10, [x0, #7] -; CHECKN-NEXT: ldur x11, [x1, #7] -; CHECKN-NEXT: cmp x8, x9 -; CHECKN-NEXT: ccmp x10, x11, #0, eq -; CHECKN-NEXT: cset w0, eq -; CHECKN-NEXT: ret -; -; CHECKS-LABEL: test_b2_align8: -; CHECKS: // %bb.0: // %entry -; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECKS-NEXT: .cfi_def_cfa_offset 16 -; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #15 // =0xf -; CHECKS-NEXT: bl bcmp -; CHECKS-NEXT: cmp w0, #0 -; CHECKS-NEXT: cset w0, eq -; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECKS-NEXT: ret -entry: - %bcmp = call i32 @bcmp(ptr %s1, ptr %s2, i64 15) - %ret = icmp eq i32 %bcmp, 0 - ret i1 %ret -} - -define i1 @test_bs(ptr %s1, ptr %s2) optsize { -; CHECKN-LABEL: test_bs: -; CHECKN: // %bb.0: // %entry -; CHECKN-NEXT: ldp x8, x11, [x1] -; CHECKN-NEXT: ldr x12, [x0, #16] -; CHECKN-NEXT: ldp x9, x10, [x0] -; CHECKN-NEXT: ldr x13, [x1, #16] -; CHECKN-NEXT: cmp x9, x8 -; CHECKN-NEXT: ldur x8, [x0, #23] -; CHECKN-NEXT: ldur x9, [x1, #23] -; CHECKN-NEXT: ccmp x10, x11, #0, eq -; CHECKN-NEXT: ccmp x12, x13, #0, eq -; CHECKN-NEXT: ccmp x8, x9, #0, eq -; CHECKN-NEXT: cset w0, eq -; CHECKN-NEXT: ret -; -; CHECKS-LABEL: test_bs: -; CHECKS: // %bb.0: // %entry -; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECKS-NEXT: .cfi_def_cfa_offset 16 -; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #31 // =0x1f -; CHECKS-NEXT: bl memcmp -; CHECKS-NEXT: cmp w0, #0 -; CHECKS-NEXT: cset w0, eq -; CHECKS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECKS-NEXT: ret -entry: - %memcmp = call i32 @memcmp(ptr %s1, ptr %s2, i64 31) - %ret = icmp eq i32 %memcmp, 0 - ret i1 %ret -} diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll deleted file mode 100644 index fee52ead98962..0000000000000 --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ /dev/null @@ -1,537 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s - -declare i32 @bcmp(ptr, ptr, i64) - -define i1 @bcmp0(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp1(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 1) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp2(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp2: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 2) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; or (and (xor a, b), C1), (and (xor c, d), C2) -define i1 @bcmp3(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp3: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #2] -; CHECK-NEXT: ldrb w11, [x1, #2] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp4(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp4: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 4) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; or (xor a, b), (and (xor c, d), C2) -define i1 @bcmp5(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp5: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #4] -; CHECK-NEXT: ldrb w11, [x1, #4] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; or (xor a, b), (and (xor c, d), C2) -define i1 @bcmp6(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp6: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #4] -; CHECK-NEXT: ldrh w11, [x1, #4] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; or (xor a, b), (xor c, d) -define i1 @bcmp7(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp7: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: ldur w10, [x0, #3] -; CHECK-NEXT: ldur w11, [x1, #3] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp8(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 8) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; or (xor a, b), (and (xor c, d), C2) -define i1 @bcmp9(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp9: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #8] -; CHECK-NEXT: ldrb w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp10(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp10: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #8] -; CHECK-NEXT: ldrh w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp11(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp11: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #3] -; CHECK-NEXT: ldur x11, [x1, #3] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp12(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp12: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldr w10, [x0, #8] -; CHECK-NEXT: ldr w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp13(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp13: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #5] -; CHECK-NEXT: ldur x11, [x1, #5] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp14(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp14: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #6] -; CHECK-NEXT: ldur x11, [x1, #6] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp15(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp15: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #7] -; CHECK-NEXT: ldur x11, [x1, #7] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp16(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp20(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp20: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr w12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr w13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp24(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp24: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp28(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp28: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldr w8, [x0, #24] -; CHECK-NEXT: ldr w9, [x1, #24] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: ccmp x8, x9, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp33(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp33: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ldrb w10, [x0, #32] -; CHECK-NEXT: ldrb w11, [x1, #32] -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp38(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp38: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ldur x10, [x0, #30] -; CHECK-NEXT: ldur x11, [x1, #30] -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp45(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp45: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ldr x10, [x0, #32] -; CHECK-NEXT: ldr x11, [x1, #32] -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldur x8, [x0, #37] -; CHECK-NEXT: ldur x12, [x1, #37] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -; Although the large cmp chain may be not profitable on high end CPU, we -; believe it is better on most cpus, so perform the transform now. -; 8 xor + 7 or + 1 cmp only need 6 cycles on a 4 width ALU port machine -; 2 cycle for xor -; 3 cycle for or -; 1 cycle for cmp -define i1 @bcmp64(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x0, #32] -; CHECK-NEXT: ldp x10, x12, [x1, #32] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ldp x9, x13, [x1, #48] -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: ldp x8, x10, [x0, #48] -; CHECK-NEXT: ccmp x11, x12, #0, eq -; CHECK-NEXT: ccmp x8, x9, #0, eq -; CHECK-NEXT: ccmp x10, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp89(ptr %a, ptr %b) { -; CHECK-LABEL: bcmp89: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w2, #89 // =0x59 -; CHECK-NEXT: bl bcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %cr = call i32 @bcmp(ptr %a, ptr %b, i64 89) - %r = icmp eq i32 %cr, 0 - ret i1 %r -} - -define i1 @bcmp_zext(i32 %0, i32 %1, i8 %2, i8 %3) { -; CHECK-LABEL: bcmp_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0xff -; CHECK-NEXT: and w9, w3, #0xff -; CHECK-NEXT: cmp w1, w0 -; CHECK-NEXT: ccmp w9, w8, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %5 = xor i32 %1, %0 - %6 = xor i8 %3, %2 - %7 = zext i8 %6 to i32 - %8 = or i32 %5, %7 - %9 = icmp eq i32 %8, 0 - ret i1 %9 -} - -define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) { -; CHECK-LABEL: bcmp_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w2, #0xff -; CHECK-NEXT: and w10, w3, #0xff -; CHECK-NEXT: cmp w8, w0, uxtb -; CHECK-NEXT: and w8, w4, #0xff -; CHECK-NEXT: and w11, w5, #0xff -; CHECK-NEXT: ccmp w10, w9, #0, eq -; CHECK-NEXT: ccmp w11, w8, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %xor0 = xor i8 %b0, %a0 - %xor1 = xor i8 %b1, %a1 - %xor2 = xor i8 %b2, %a2 - %or0 = or i8 %xor0, %xor1 - %or1 = or i8 %or0, %xor2 - %r = icmp eq i8 %or1, 0 - ret i1 %r -} - -define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) { -; CHECK-LABEL: bcmp_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w2, #0xffff -; CHECK-NEXT: and w10, w3, #0xffff -; CHECK-NEXT: cmp w8, w0, uxth -; CHECK-NEXT: and w8, w4, #0xffff -; CHECK-NEXT: and w11, w5, #0xffff -; CHECK-NEXT: ccmp w10, w9, #0, eq -; CHECK-NEXT: ccmp w11, w8, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %xor0 = xor i16 %b0, %a0 - %xor1 = xor i16 %b1, %a1 - %xor2 = xor i16 %b2, %a2 - %or0 = or i16 %xor0, %xor1 - %or1 = or i16 %or0, %xor2 - %r = icmp eq i16 %or1, 0 - ret i1 %r -} - -define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) { -; CHECK-LABEL: bcmp_i128: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: ldp x8, x10, [sp] -; CHECK-NEXT: ccmp x3, x1, #0, eq -; CHECK-NEXT: ldp x9, x11, [sp, #16] -; CHECK-NEXT: ccmp x6, x4, #0, eq -; CHECK-NEXT: ccmp x7, x5, #0, eq -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x11, x10, #0, eq -; CHECK-NEXT: csinc w0, w12, wzr, eq -; CHECK-NEXT: ret - %xor0 = xor i128 %b0, %a0 - %xor1 = xor i128 %b1, %a1 - %xor2 = xor i128 %b2, %a2 - %or0 = or i128 %xor0, %xor1 - %or1 = or i128 %or0, %xor2 - %r = icmp ne i128 %or1, 0 - ret i1 %r -} - -define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) { -; CHECK-LABEL: bcmp_i42: -; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x3ffffffffff -; CHECK-NEXT: and x9, x1, #0x3ffffffffff -; CHECK-NEXT: and x10, x2, #0x3ffffffffff -; CHECK-NEXT: and x11, x3, #0x3ffffffffff -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: and x8, x4, #0x3ffffffffff -; CHECK-NEXT: and x9, x5, #0x3ffffffffff -; CHECK-NEXT: ccmp x11, x10, #0, eq -; CHECK-NEXT: ccmp x9, x8, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %xor0 = xor i42 %b0, %a0 - %xor1 = xor i42 %b1, %a1 - %xor2 = xor i42 %b2, %a2 - %or0 = or i42 %xor0, %xor1 - %or1 = or i42 %or0, %xor2 - %r = icmp ne i42 %or1, 0 - ret i1 %r -} diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index a48a4e0e723eb..f22d3acb75026 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) { @@ -266,9 +266,19 @@ define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) { ; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret - %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) - %cmp = icmp eq i32 %bcmp, 0 - ret i1 %cmp + %a.0 = load i64, ptr %a, align 1 + %b.0 = load i64, ptr %b, align 1 + %xor1 = xor i64 %a.0, %b.0 + %1 = getelementptr i8, ptr %a, i64 8 + %2 = getelementptr i8, ptr %b, i64 8 + %a.8 = load i64, ptr %1, align 1 + %b.8 = load i64, ptr %2, align 1 + %xor2 = xor i64 %a.8, %b.8 + %or = or i64 %xor1, %xor2 + %cmp1 = icmp ne i64 %or, 0 + %ext = zext i1 %cmp1 to i32 + %cmp2 = icmp eq i32 %ext, 0 + ret i1 %cmp2 } define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) { @@ -280,8 +290,17 @@ define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) { ; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret - %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) - %cmp = icmp ne i32 %bcmp, 0 + %a.0 = load i64, ptr %a, align 1 + %b.0 = load i64, ptr %b, align 1 + %xor1 = xor i64 %a.0, %b.0 + %1 = getelementptr i8, ptr %a, i64 8 + %2 = getelementptr i8, ptr %b, i64 8 + %a.8 = load i64, ptr %1, align 1 + %b.8 = load i64, ptr %2, align 1 + %xor2 = xor i64 %a.8, %b.8 + %or = or i64 %xor1, %xor2 + %cmp = icmp ne i64 %or, 0 + %ext = zext i1 %cmp to i32 ret i1 %cmp } diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index 30123a31cebbe..4c2188cf340e8 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -25,20 +25,23 @@ define i64 @one_dimensional(ptr %a, ptr %b, i64 %N) { entry: br label %for.body -for.body: ; preds = %entry, %for.body +for.body: ; preds = %for.body, %entry %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06 %0 = load ptr, ptr %arrayidx, align 8 - %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) - %tobool = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %0, align 1 + %bcmp_exp2 = load i32, ptr %b, align 1 + %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2 + %res = zext i1 %cmp to i32 + %tobool = icmp eq i32 %res, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.05, %add %inc = add nuw i64 %i.06, 1 %exitcond = icmp eq i64 %inc, %N br i1 %exitcond, label %for.exit, label %for.body -for.exit: ; preds = %for.body +for.exit: ; preds = %for.body ret i64 %spec.select } @@ -79,32 +82,35 @@ define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M) { entry: br label %for.cond1.preheader -for.cond1.preheader: ; preds = %entry, %for.cond1.for.exit3_crit_edge +for.cond1.preheader: ; preds = %for.cond1.for.exit3_crit_edge, %entry %i.019 = phi i64 [ %inc7, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ] %sum.018 = phi i64 [ %spec.select, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019 %0 = load ptr, ptr %arrayidx, align 8 br label %for.body4 -for.body4: ; preds = %for.cond1.preheader, %for.body4 +for.body4: ; preds = %for.body4, %for.cond1.preheader %j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ] %sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ] %arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016 %1 = load ptr, ptr %arrayidx5, align 8 - %bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4) - %tobool = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %1, align 1 + %bcmp_exp2 = load i32, ptr %b, align 1 + %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2 + %res = zext i1 %cmp to i32 + %tobool = icmp eq i32 %res, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.115, %add %inc = add nuw i64 %j.016, 1 %exitcond = icmp eq i64 %inc, %M br i1 %exitcond, label %for.cond1.for.exit3_crit_edge, label %for.body4 -for.cond1.for.exit3_crit_edge: ; preds = %for.body4 +for.cond1.for.exit3_crit_edge: ; preds = %for.body4 %inc7 = add nuw i64 %i.019, 1 %exitcond22 = icmp eq i64 %inc7, %N br i1 %exitcond22, label %for.exit, label %for.cond1.preheader -for.exit: ; preds = %for.cond1.for.exit3_crit_edge +for.exit: ; preds = %for.cond1.for.exit3_crit_edge ret i64 %spec.select } @@ -159,44 +165,47 @@ define i64 @three_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { entry: br label %for.cond1.preheader -for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond +for.cond1.preheader: ; preds = %for.cond1.for.cond, %entry %i.033 = phi i64 [ %inc15, %for.cond1.for.cond ], [ 0, %entry ] %sum.032 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.033 %0 = load ptr, ptr %arrayidx, align 8 br label %for.cond5.preheader -for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader +for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader %j.029 = phi i64 [ 0, %for.cond1.preheader ], [ %inc12, %for.cond5.for.cond ] %sum.128 = phi i64 [ %sum.032, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ] %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.029 %1 = load ptr, ptr %arrayidx9, align 8 br label %for.body8 -for.body8: ; preds = %for.body8, %for.cond5.preheader +for.body8: ; preds = %for.body8, %for.cond5.preheader %k.026 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ] %sum.225 = phi i64 [ %sum.128, %for.cond5.preheader ], [ %spec.select, %for.body8 ] %arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.026 %2 = load ptr, ptr %arrayidx10, align 8 - %bcmp = tail call i32 @bcmp(ptr %2, ptr %b, i64 4) - %tobool = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %2, align 1 + %bcmp_exp2 = load i32, ptr %b, align 1 + %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2 + %res = zext i1 %cmp to i32 + %tobool = icmp eq i32 %res, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.225, %add %inc = add nuw i64 %k.026, 1 %exitcond = icmp eq i64 %inc, %K br i1 %exitcond, label %for.cond5.for.cond, label %for.body8 -for.cond5.for.cond: ; preds = %for.body8 +for.cond5.for.cond: ; preds = %for.body8 %inc12 = add nuw i64 %j.029, 1 %exitcond44 = icmp eq i64 %inc12, %M br i1 %exitcond44, label %for.cond1.for.cond, label %for.cond5.preheader -for.cond1.for.cond: ; preds = %for.cond5.for.cond +for.cond1.for.cond: ; preds = %for.cond5.for.cond %inc15 = add nuw i64 %i.033, 1 %exitcond45 = icmp eq i64 %inc15, %N br i1 %exitcond45, label %for.exit, label %for.cond1.preheader -for.exit: ; preds = %for.cond1.for.cond +for.exit: ; preds = %for.cond1.for.cond ret i64 %spec.select } @@ -254,14 +263,14 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { entry: br label %for.cond1.preheader -for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond +for.cond1.preheader: ; preds = %for.cond1.for.cond, %entry %i.035 = phi i64 [ %inc16, %for.cond1.for.cond ], [ 0, %entry ] %sum.034 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.035 %0 = load ptr, ptr %arrayidx, align 8 br label %for.cond5.preheader -for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader +for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader %j.031 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.cond5.for.cond ] %sum.130 = phi i64 [ %sum.034, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ] %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.031 @@ -270,30 +279,33 @@ for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.con %2 = load ptr, ptr %arrayidx11, align 8 br label %for.body8 -for.body8: ; preds = %for.body8, %for.cond5.preheader +for.body8: ; preds = %for.body8, %for.cond5.preheader %k.028 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ] %sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ] %arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028 %3 = load ptr, ptr %arrayidx10, align 8 - %bcmp = tail call i32 @bcmp(ptr %3, ptr %2, i64 4) - %tobool = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %3, align 1 + %bcmp_exp2 = load i32, ptr %2, align 1 + %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2 + %res = zext i1 %cmp to i32 + %tobool = icmp eq i32 %res, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.227, %add %inc = add nuw i64 %k.028, 1 %exitcond = icmp eq i64 %inc, %K br i1 %exitcond, label %for.cond5.for.cond, label %for.body8 -for.cond5.for.cond: ; preds = %for.body8 +for.cond5.for.cond: ; preds = %for.body8 %inc13 = add nuw i64 %j.031, 1 %exitcond46 = icmp eq i64 %inc13, %M br i1 %exitcond46, label %for.cond1.for.cond, label %for.cond5.preheader -for.cond1.for.cond: ; preds = %for.cond5.for.cond +for.cond1.for.cond: ; preds = %for.cond5.for.cond %inc16 = add nuw i64 %i.035, 1 %exitcond47 = icmp eq i64 %inc16, %N br i1 %exitcond47, label %for.exit, label %for.cond1.preheader -for.exit: ; preds = %for.cond1.for.cond +for.exit: ; preds = %for.cond1.for.cond ret i64 %spec.select } @@ -328,19 +340,27 @@ for.body.preheader: ; preds = %entry %wide.trip.count = zext i32 %N to i64 br label %for.body -for.body: ; preds = %for.body.preheader, %for.body +for.body: ; preds = %for.body, %for.body.preheader %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv %0 = load ptr, ptr %arrayidx, align 8 - %call = tail call i32 @memcmp(ptr %0, ptr %b, i64 4) - %conv = trunc i32 %call to i8 + %memcmp_exp = load i32, ptr %0, align 1 + %memcmp_exp2 = load i32, ptr %b, align 1 + %swap = call i32 @llvm.bswap.i32(i32 %memcmp_exp) + %swap2 = call i32 @llvm.bswap.i32(i32 %memcmp_exp2) + %cmp1 = icmp ugt i32 %swap, %swap2 + %cmp2 = icmp ult i32 %swap, %swap2 + %ext1 = zext i1 %cmp1 to i32 + %ext2 = zext i1 %cmp2 to i32 + %res = sub i32 %ext1, %ext2 + %conv = trunc i32 %res to i8 %arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv store i8 %conv, ptr %arrayidx2, align 1 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.exit, label %for.body -for.exit: ; preds = %for.body +for.exit: ; preds = %for.body ret void } @@ -385,13 +405,16 @@ for.body.preheader: ; preds = %entry %wide.trip.count = zext i32 %N to i64 br label %for.body -for.body: ; preds = %for.body.preheader, %for.body +for.body: ; preds = %for.body, %for.body.preheader %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] %sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv %0 = load ptr, ptr %arrayidx, align 8 - %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) - %tobool.not = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %0, align 1 + %bcmp_exp2 = load i32, ptr %b, align 1 + %cmp = icmp ne i32 %bcmp_exp, %bcmp_exp2 + %res = zext i1 %cmp to i32 + %tobool.not = icmp eq i32 %res, 0 %add = zext i1 %tobool.not to i32 %spec.select = add nuw nsw i32 %sum.05, %add tail call void @func() @@ -399,7 +422,7 @@ for.body: ; preds = %for.body.preheader, %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.exit, label %for.body -for.exit: ; preds = %for.body +for.exit: ; preds = %for.body ret i32 %spec.select } @@ -431,20 +454,32 @@ define i64 @one_dimensional_two_loads(ptr %a, ptr %b, i64 %N) { entry: br label %for.body -for.body: ; preds = %entry, %for.body +for.body: ; preds = %for.body, %entry %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06 %0 = load ptr, ptr %arrayidx, align 8 - %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 6) - %tobool = icmp eq i32 %bcmp, 0 + %bcmp_exp = load i32, ptr %0, align 1 + %bcmp_exp2 = load i32, ptr %b, align 1 + %xor1 = xor i32 %bcmp_exp, %bcmp_exp2 + %gep0 = getelementptr i8, ptr %0, i64 4 + %gepb = getelementptr i8, ptr %b, i64 4 + %bcmp_exp3 = load i16, ptr %gep0, align 1 + %bcmp_exp4 = load i16, ptr %gepb, align 1 + %ext = zext i16 %bcmp_exp3 to i32 + %ext2 = zext i16 %bcmp_exp4 to i32 + %xor2 = xor i32 %ext, %ext2 + %or = or i32 %xor1, %xor2 + %cmp = icmp ne i32 %or, 0 + %res = zext i1 %cmp to i32 + %tobool = icmp eq i32 %res, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.05, %add %inc = add nuw i64 %i.06, 1 %exitcond = icmp eq i64 %inc, %N br i1 %exitcond, label %for.exit, label %for.body -for.exit: ; preds = %for.body +for.exit: ; preds = %for.body ret i64 %spec.select } @@ -475,18 +510,18 @@ define i64 @hoisting_no_cse(ptr %a, ptr %b, ptr %c, i64 %N) { ; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret entry: - %b.val = load i64, ptr %b + %b.val = load i64, ptr %b, align 8 %b.val.changed = add i64 %b.val, 1 - store i64 %b.val.changed, ptr %c + store i64 %b.val.changed, ptr %c, align 8 br label %for.body -for.body: ; preds = %entry, %for.body +for.body: ; preds = %for.body, %entry %idx = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %idx %0 = load ptr, ptr %arrayidx, align 8 - %x = load i64, ptr %0 - %y = load i64, ptr %b + %x = load i64, ptr %0, align 8 + %y = load i64, ptr %b, align 8 %cmp = icmp eq i64 %x, %y %add = zext i1 %cmp to i64 %spec.select = add i64 %sum, %add @@ -494,10 +529,15 @@ for.body: ; preds = %entry, %for.body %exitcond = icmp eq i64 %inc, %N br i1 %exitcond, label %for.exit, label %for.body -for.exit: ; preds = %for.body +for.exit: ; preds = %for.body ret i64 %spec.select } declare i32 @bcmp(ptr, ptr, i64) declare i32 @memcmp(ptr, ptr, i64) declare void @func() + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.bswap.i32(i32) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll deleted file mode 100644 index 4da7c8c95a4e4..0000000000000 --- a/llvm/test/CodeGen/AArch64/memcmp.ll +++ /dev/null @@ -1,3029 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s - -@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) - -define i32 @length0(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - ret i32 %m - } - -define i1 @length0_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length0_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length0_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length0_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length2(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: sub w0, w8, w9, lsr #16 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i32 @length2_const(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_const: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: mov w8, #-12594 // =0xffffcece -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: add w0, w8, w9, lsr #16 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - ret i32 %m -} - -define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_gt_const: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: mov w8, #-12594 // =0xffffcece -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: add w8, w8, w9, lsr #16 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: sub w8, w8, w9, lsr #16 -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_gt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: sub w8, w8, w9, lsr #16 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length2_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: mov w9, #12849 // =0x3231 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_eq_nobuiltin_attr: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #2 // =0x2 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length3: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #2] -; CHECK-NEXT: ldrh w11, [x1] -; CHECK-NEXT: orr w8, w9, w8, lsl #16 -; CHECK-NEXT: orr w9, w11, w10, lsl #16 -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length3_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #2] -; CHECK-NEXT: ldrb w11, [x1, #2] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length4: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length4_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length4_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length4_lt_32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = lshr i32 %m, 31 - ret i32 %c -} - -define i1 @length4_gt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length4_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, hi -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length4_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w9, #12849 // =0x3231 -; CHECK-NEXT: movk w9, #13363, lsl #16 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length5: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #4] -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #4] -; CHECK-NEXT: ldr w11, [x1] -; CHECK-NEXT: orr x8, x9, x8, lsl #32 -; CHECK-NEXT: orr x9, x11, x10, lsl #32 -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length5_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #4] -; CHECK-NEXT: ldrb w11, [x1, #4] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length5_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length5_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #4] -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #4] -; CHECK-NEXT: ldr w11, [x1] -; CHECK-NEXT: orr x8, x9, x8, lsl #32 -; CHECK-NEXT: orr x9, x11, x10, lsl #32 -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length6(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length6: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0, #4] -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #4] -; CHECK-NEXT: ldr w11, [x1] -; CHECK-NEXT: orr x8, x9, x8, lsl #32 -; CHECK-NEXT: orr x9, x11, x10, lsl #32 -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind - ret i32 %m -} - -define i32 @length6_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length6_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0, #4] -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #4] -; CHECK-NEXT: ldr w11, [x1] -; CHECK-NEXT: orr x8, x9, x8, lsl #32 -; CHECK-NEXT: orr x9, x11, x10, lsl #32 -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind - %r = lshr i32 %m, 31 - ret i32 %r -} - -define i32 @length7(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length7: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB24_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldur w8, [x0, #3] -; CHECK-NEXT: ldur w9, [x1, #3] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB24_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB24_3: // %res_block -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - ret i32 %m -} - -define i1 @length7_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length7_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB25_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldur w8, [x0, #3] -; CHECK-NEXT: ldur w9, [x1, #3] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB25_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB25_3: // %res_block -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length7_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length7_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: ldur w10, [x0, #3] -; CHECK-NEXT: ldur w11, [x1, #3] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: ccmp w10, w11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length8_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length8_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #12592 // =0x3130 -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: movk x9, #13106, lsl #16 -; CHECK-NEXT: movk x9, #13620, lsl #32 -; CHECK-NEXT: movk x9, #14134, lsl #48 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length9(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length9: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB30_2 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldrb w8, [x0, #8] -; CHECK-NEXT: ldrb w9, [x1, #8] -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB30_2: // %res_block -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind - ret i32 %m -} - -define i1 @length9_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length9_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #8] -; CHECK-NEXT: ldrb w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length10(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length10: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB32_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldrh w8, [x0, #8] -; CHECK-NEXT: ldrh w9, [x1, #8] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB32_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB32_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind - ret i32 %m -} - -define i1 @length10_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length10_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #8] -; CHECK-NEXT: ldrh w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length11(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length11: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB34_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldur x8, [x0, #3] -; CHECK-NEXT: ldur x9, [x1, #3] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB34_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB34_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind - ret i32 %m -} - -define i1 @length11_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length11_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #3] -; CHECK-NEXT: ldur x11, [x1, #3] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length12_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldr w10, [x0, #8] -; CHECK-NEXT: ldr w11, [x1, #8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length12: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB37_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr w8, [x0, #8] -; CHECK-NEXT: ldr w9, [x1, #8] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB37_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB37_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -define i1 @length13_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length13_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #5] -; CHECK-NEXT: ldur x11, [x1, #5] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length14_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length14_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #6] -; CHECK-NEXT: ldur x11, [x1, #6] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length15(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length15: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB40_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldur x8, [x0, #7] -; CHECK-NEXT: ldur x9, [x1, #7] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB40_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB40_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - ret i32 %m -} - -define i1 @length15_lt(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length15_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB41_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldur x8, [x0, #7] -; CHECK-NEXT: ldur x9, [x1, #7] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB41_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB41_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length15_const(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length15_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #14136 // =0x3738 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: movk x8, #13622, lsl #16 -; CHECK-NEXT: movk x8, #13108, lsl #32 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: movk x8, #12594, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: b.ne .LBB42_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: mov x8, #13365 // =0x3435 -; CHECK-NEXT: ldur x9, [x0, #7] -; CHECK-NEXT: movk x8, #12851, lsl #16 -; CHECK-NEXT: movk x8, #12337, lsl #32 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: movk x8, #14393, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: b.ne .LBB42_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB42_3: // %res_block -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind - ret i32 %m -} - -define i1 @length15_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length15_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: ldur x10, [x0, #7] -; CHECK-NEXT: ldur x11, [x1, #7] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length15_gt_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #14136 // =0x3738 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: movk x8, #13622, lsl #16 -; CHECK-NEXT: movk x8, #13108, lsl #32 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: movk x8, #12594, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: b.ne .LBB44_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: mov x8, #13365 // =0x3435 -; CHECK-NEXT: ldur x9, [x0, #7] -; CHECK-NEXT: movk x8, #12851, lsl #16 -; CHECK-NEXT: movk x8, #12337, lsl #32 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: movk x8, #14393, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: b.ne .LBB44_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB44_4 -; CHECK-NEXT: .LBB44_3: // %res_block -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB44_4: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - - -define i32 @length16(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB45_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB45_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB45_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length16_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length16_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB47_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB47_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB47_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length16_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB48_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB48_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB48_4 -; CHECK-NEXT: .LBB48_3: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB48_4: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length16_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x8, #14648 // =0x3938 -; CHECK-NEXT: movk x8, #12592, lsl #16 -; CHECK-NEXT: movk x8, #13106, lsl #32 -; CHECK-NEXT: movk x8, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x8, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - - -define i32 @length24(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length24: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB50_4 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB50_4 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB50_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB50_4: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length24_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length24_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB52_4 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB52_4 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB52_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB52_4: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length24_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB53_4 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB53_4 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB53_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB53_5 -; CHECK-NEXT: .LBB53_4: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB53_5: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length24_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: ldr x11, [x0, #16] -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x8, #14648 // =0x3938 -; CHECK-NEXT: movk x8, #12592, lsl #16 -; CHECK-NEXT: movk x8, #13106, lsl #32 -; CHECK-NEXT: movk x8, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x8, #0, eq -; CHECK-NEXT: mov x8, #14134 // =0x3736 -; CHECK-NEXT: movk x8, #14648, lsl #16 -; CHECK-NEXT: movk x8, #12592, lsl #32 -; CHECK-NEXT: movk x8, #13106, lsl #48 -; CHECK-NEXT: ccmp x11, x8, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length31(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length31: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB55_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB55_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB55_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldur x8, [x0, #23] -; CHECK-NEXT: ldur x9, [x1, #23] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB55_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB55_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind - ret i32 %m -} - -define i1 @length31_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length31_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldur x8, [x0, #23] -; CHECK-NEXT: ldur x9, [x1, #23] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: ccmp x8, x9, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length31_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB57_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB57_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB57_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldur x8, [x0, #23] -; CHECK-NEXT: ldur x9, [x1, #23] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB57_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB57_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length31_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB58_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB58_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB58_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldur x8, [x0, #23] -; CHECK-NEXT: ldur x9, [x1, #23] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB58_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB58_6 -; CHECK-NEXT: .LBB58_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB58_6: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; CHECK-LABEL: length31_eq_prefer128: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldur x8, [x0, #23] -; CHECK-NEXT: ldur x9, [x1, #23] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x12, x13, #0, eq -; CHECK-NEXT: ccmp x8, x9, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length31_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: ldr x11, [x0, #16] -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x8, #14648 // =0x3938 -; CHECK-NEXT: ldur x9, [x0, #23] -; CHECK-NEXT: movk x8, #12592, lsl #16 -; CHECK-NEXT: movk x8, #13106, lsl #32 -; CHECK-NEXT: movk x8, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x8, #0, eq -; CHECK-NEXT: mov x8, #14134 // =0x3736 -; CHECK-NEXT: movk x8, #14648, lsl #16 -; CHECK-NEXT: movk x8, #12592, lsl #32 -; CHECK-NEXT: movk x8, #13106, lsl #48 -; CHECK-NEXT: ccmp x11, x8, #0, eq -; CHECK-NEXT: mov x8, #13363 // =0x3433 -; CHECK-NEXT: movk x8, #13877, lsl #16 -; CHECK-NEXT: movk x8, #14391, lsl #32 -; CHECK-NEXT: movk x8, #12345, lsl #48 -; CHECK-NEXT: ccmp x9, x8, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB61_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB61_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB61_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB61_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB61_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - - -define i1 @length32_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length32_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length32_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB63_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB63_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB63_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB63_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB63_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length32_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB64_5 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB64_5 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB64_5 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB64_5 -; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB64_6 -; CHECK-NEXT: .LBB64_5: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB64_6: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; CHECK-LABEL: length32_eq_prefer128: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length32_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x8, #14648 // =0x3938 -; CHECK-NEXT: movk x8, #12592, lsl #16 -; CHECK-NEXT: ldp x9, x11, [x0, #16] -; CHECK-NEXT: movk x8, #13106, lsl #32 -; CHECK-NEXT: movk x8, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x8, #0, eq -; CHECK-NEXT: mov x8, #14134 // =0x3736 -; CHECK-NEXT: movk x8, #14648, lsl #16 -; CHECK-NEXT: movk x8, #12592, lsl #32 -; CHECK-NEXT: movk x8, #13106, lsl #48 -; CHECK-NEXT: ccmp x9, x8, #0, eq -; CHECK-NEXT: mov x8, #13620 // =0x3534 -; CHECK-NEXT: movk x8, #14134, lsl #16 -; CHECK-NEXT: movk x8, #14648, lsl #32 -; CHECK-NEXT: movk x8, #12592, lsl #48 -; CHECK-NEXT: ccmp x11, x8, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length48(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length48: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB67_7 -; CHECK-NEXT: // %bb.6: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB67_7: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind - ret i32 %m -} - -define i1 @length48_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length48_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x0, #32] -; CHECK-NEXT: ldp x10, x12, [x1, #32] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: ccmp x11, x12, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length48_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB69_7 -; CHECK-NEXT: // %bb.6: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB69_7: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length48_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB70_7 -; CHECK-NEXT: // %bb.6: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB70_8 -; CHECK-NEXT: .LBB70_7: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB70_8: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; CHECK-LABEL: length48_eq_prefer128: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x0, #32] -; CHECK-NEXT: ldp x10, x12, [x1, #32] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: ccmp x11, x12, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length48_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: ldp x11, x12, [x0, #16] -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x9, #14648 // =0x3938 -; CHECK-NEXT: movk x9, #12592, lsl #16 -; CHECK-NEXT: movk x9, #13106, lsl #32 -; CHECK-NEXT: movk x9, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x9, #0, eq -; CHECK-NEXT: mov x9, #14134 // =0x3736 -; CHECK-NEXT: movk x9, #14648, lsl #16 -; CHECK-NEXT: movk x9, #12592, lsl #32 -; CHECK-NEXT: movk x9, #13106, lsl #48 -; CHECK-NEXT: ccmp x11, x9, #0, eq -; CHECK-NEXT: mov x9, #13620 // =0x3534 -; CHECK-NEXT: movk x9, #14134, lsl #16 -; CHECK-NEXT: ldp x10, x11, [x0, #32] -; CHECK-NEXT: movk x9, #14648, lsl #32 -; CHECK-NEXT: movk x9, #12592, lsl #48 -; CHECK-NEXT: ccmp x12, x9, #0, eq -; CHECK-NEXT: mov x9, #13106 // =0x3332 -; CHECK-NEXT: movk x9, #13620, lsl #16 -; CHECK-NEXT: movk x9, #14134, lsl #32 -; CHECK-NEXT: movk x9, #14648, lsl #48 -; CHECK-NEXT: ccmp x10, x9, #0, eq -; CHECK-NEXT: ccmp x11, x8, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length63(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length63: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldur x8, [x0, #55] -; CHECK-NEXT: ldur x9, [x1, #55] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB73_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB73_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind - ret i32 %m -} - -define i1 @length63_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length63_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x0, #32] -; CHECK-NEXT: ldp x10, x12, [x1, #32] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ldr x9, [x0, #48] -; CHECK-NEXT: ldr x13, [x1, #48] -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: ldur x8, [x0, #55] -; CHECK-NEXT: ldur x10, [x1, #55] -; CHECK-NEXT: ccmp x11, x12, #0, eq -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length63_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldur x8, [x0, #55] -; CHECK-NEXT: ldur x9, [x1, #55] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB75_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB75_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length63_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldur x8, [x0, #55] -; CHECK-NEXT: ldur x9, [x1, #55] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB76_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB76_10 -; CHECK-NEXT: .LBB76_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB76_10: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length63_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: ldp x11, x12, [x0, #16] -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x9, #14648 // =0x3938 -; CHECK-NEXT: movk x9, #12592, lsl #16 -; CHECK-NEXT: movk x9, #13106, lsl #32 -; CHECK-NEXT: movk x9, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x9, #0, eq -; CHECK-NEXT: mov x10, #14134 // =0x3736 -; CHECK-NEXT: movk x10, #14648, lsl #16 -; CHECK-NEXT: movk x10, #12592, lsl #32 -; CHECK-NEXT: movk x10, #13106, lsl #48 -; CHECK-NEXT: ccmp x11, x10, #0, eq -; CHECK-NEXT: mov x10, #13620 // =0x3534 -; CHECK-NEXT: movk x10, #14134, lsl #16 -; CHECK-NEXT: ldp x11, x13, [x0, #32] -; CHECK-NEXT: movk x10, #14648, lsl #32 -; CHECK-NEXT: movk x10, #12592, lsl #48 -; CHECK-NEXT: ccmp x12, x10, #0, eq -; CHECK-NEXT: mov x10, #13106 // =0x3332 -; CHECK-NEXT: ldr x12, [x0, #48] -; CHECK-NEXT: movk x10, #13620, lsl #16 -; CHECK-NEXT: movk x10, #14134, lsl #32 -; CHECK-NEXT: movk x10, #14648, lsl #48 -; CHECK-NEXT: ccmp x11, x10, #0, eq -; CHECK-NEXT: ldur x10, [x0, #55] -; CHECK-NEXT: ccmp x13, x8, #0, eq -; CHECK-NEXT: mov x8, #13877 // =0x3635 -; CHECK-NEXT: movk x8, #14391, lsl #16 -; CHECK-NEXT: ccmp x12, x9, #0, eq -; CHECK-NEXT: movk x8, #12345, lsl #32 -; CHECK-NEXT: movk x8, #12849, lsl #48 -; CHECK-NEXT: ccmp x10, x8, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldr x8, [x0, #56] -; CHECK-NEXT: ldr x9, [x1, #56] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB78_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB78_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length64_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x11, [x1] -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: ldp x12, x13, [x1, #16] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x0, #32] -; CHECK-NEXT: ldp x10, x12, [x1, #32] -; CHECK-NEXT: ccmp x9, x13, #0, eq -; CHECK-NEXT: ldp x9, x13, [x1, #48] -; CHECK-NEXT: ccmp x8, x10, #0, eq -; CHECK-NEXT: ldp x8, x10, [x0, #48] -; CHECK-NEXT: ccmp x11, x12, #0, eq -; CHECK-NEXT: ccmp x8, x9, #0, eq -; CHECK-NEXT: ccmp x10, x13, #0, eq -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length64_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldr x8, [x0, #56] -; CHECK-NEXT: ldr x9, [x1, #56] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB80_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: lsr w0, wzr, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB80_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length64_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr x9, [x1, #8] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.2: // %loadbb2 -; CHECK-NEXT: ldr x8, [x0, #16] -; CHECK-NEXT: ldr x9, [x1, #16] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.3: // %loadbb3 -; CHECK-NEXT: ldr x8, [x0, #24] -; CHECK-NEXT: ldr x9, [x1, #24] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.4: // %loadbb4 -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ldr x9, [x1, #32] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.5: // %loadbb5 -; CHECK-NEXT: ldr x8, [x0, #40] -; CHECK-NEXT: ldr x9, [x1, #40] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.6: // %loadbb6 -; CHECK-NEXT: ldr x8, [x0, #48] -; CHECK-NEXT: ldr x9, [x1, #48] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.7: // %loadbb7 -; CHECK-NEXT: ldr x8, [x0, #56] -; CHECK-NEXT: ldr x9, [x1, #56] -; CHECK-NEXT: rev x8, x8 -; CHECK-NEXT: rev x9, x9 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB81_9 -; CHECK-NEXT: // %bb.8: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: b .LBB81_10 -; CHECK-NEXT: .LBB81_9: // %res_block -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: .LBB81_10: // %endblock -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length64_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #12592 // =0x3130 -; CHECK-NEXT: ldp x9, x10, [x0] -; CHECK-NEXT: movk x8, #13106, lsl #16 -; CHECK-NEXT: ldp x11, x12, [x0, #16] -; CHECK-NEXT: movk x8, #13620, lsl #32 -; CHECK-NEXT: ldp x13, x14, [x0, #32] -; CHECK-NEXT: movk x8, #14134, lsl #48 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: mov x9, #14648 // =0x3938 -; CHECK-NEXT: movk x9, #12592, lsl #16 -; CHECK-NEXT: movk x9, #13106, lsl #32 -; CHECK-NEXT: movk x9, #13620, lsl #48 -; CHECK-NEXT: ccmp x10, x9, #0, eq -; CHECK-NEXT: mov x10, #14134 // =0x3736 -; CHECK-NEXT: movk x10, #14648, lsl #16 -; CHECK-NEXT: movk x10, #12592, lsl #32 -; CHECK-NEXT: movk x10, #13106, lsl #48 -; CHECK-NEXT: ccmp x11, x10, #0, eq -; CHECK-NEXT: mov x11, #13620 // =0x3534 -; CHECK-NEXT: movk x11, #14134, lsl #16 -; CHECK-NEXT: movk x11, #14648, lsl #32 -; CHECK-NEXT: movk x11, #12592, lsl #48 -; CHECK-NEXT: ccmp x12, x11, #0, eq -; CHECK-NEXT: mov x11, #13106 // =0x3332 -; CHECK-NEXT: movk x11, #13620, lsl #16 -; CHECK-NEXT: movk x11, #14134, lsl #32 -; CHECK-NEXT: movk x11, #14648, lsl #48 -; CHECK-NEXT: ccmp x13, x11, #0, eq -; CHECK-NEXT: ldp x11, x12, [x0, #48] -; CHECK-NEXT: ccmp x14, x8, #0, eq -; CHECK-NEXT: ccmp x11, x9, #0, eq -; CHECK-NEXT: ccmp x12, x10, #0, eq -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length96(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length96: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #96 // =0x60 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind - ret i32 %m -} - -define i1 @length96_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length96_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #96 // =0x60 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length96_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #96 // =0x60 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length96_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #96 // =0x60 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length96_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #96 // =0x60 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length127(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length127: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #127 // =0x7f -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind - ret i32 %m -} - -define i1 @length127_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length127_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #127 // =0x7f -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length127_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #127 // =0x7f -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length127_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #127 // =0x7f -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length127_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #127 // =0x7f -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length128(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length128: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #128 // =0x80 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind - ret i32 %m -} - -define i1 @length128_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length128_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #128 // =0x80 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length128_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #128 // =0x80 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length128_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #128 // =0x80 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length128_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #128 // =0x80 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length192(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length192: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #192 // =0xc0 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind - ret i32 %m -} - -define i1 @length192_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length192_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #192 // =0xc0 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length192_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #192 // =0xc0 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length192_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #192 // =0xc0 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length192_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #192 // =0xc0 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length255(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length255: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #255 // =0xff -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind - ret i32 %m -} - -define i1 @length255_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length255_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #255 // =0xff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length255_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #255 // =0xff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length255_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #255 // =0xff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length255_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #255 // =0xff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length256(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length256: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #256 // =0x100 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind - ret i32 %m -} - -define i1 @length256_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length256_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #256 // =0x100 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length256_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #256 // =0x100 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length256_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #256 // =0x100 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length256_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #256 // =0x100 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length384(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length384: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #384 // =0x180 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind - ret i32 %m -} - -define i1 @length384_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length384_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #384 // =0x180 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length384_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #384 // =0x180 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length384_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #384 // =0x180 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length384_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #384 // =0x180 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length511(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length511: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #511 // =0x1ff -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind - ret i32 %m -} - -define i1 @length511_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length511_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #511 // =0x1ff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length511_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #511 // =0x1ff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length511_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #511 // =0x1ff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length511_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #511 // =0x1ff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length512(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length512: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w2, #512 // =0x200 -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind - ret i32 %m -} - -define i1 @length512_eq(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length512_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #512 // =0x200 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_lt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length512_lt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #512 // =0x200 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: lsr w0, w0, #31 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_gt(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: length512_gt: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w2, #512 // =0x200 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, gt -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_eq_const(ptr %X) nounwind { -; CHECK-LABEL: length512_eq_const: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: adrp x1, .L.str -; CHECK-NEXT: add x1, x1, :lo12:.L.str -; CHECK-NEXT: mov w2, #512 // =0x200 -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @huge_length(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: huge_length: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - ret i32 %m -} - -define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: huge_length_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { -; CHECK-LABEL: nonconst_length: -; CHECK: // %bb.0: -; CHECK-NEXT: b memcmp - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - ret i32 %m -} - -define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { -; CHECK-LABEL: nonconst_length_eq: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 48f00a82e3e1c..77436ccbd2319 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -203,13 +203,6 @@ ; GCN-O1-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O1-NEXT: Induction Variable Users ; GCN-O1-NEXT: Loop Strength Reduction -; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-NEXT: Function Alias Analysis Results -; GCN-O1-NEXT: Merge contiguous icmps into a memcmp -; GCN-O1-NEXT: Natural Loop Information -; GCN-O1-NEXT: Lazy Branch Probability Analysis -; GCN-O1-NEXT: Lazy Block Frequency Analysis -; GCN-O1-NEXT: Expand memcmp() to load/stores ; GCN-O1-NEXT: Lower constant intrinsics ; GCN-O1-NEXT: Remove unreachable blocks from the CFG ; GCN-O1-NEXT: Natural Loop Information @@ -485,13 +478,6 @@ ; GCN-O1-OPTS-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O1-OPTS-NEXT: Induction Variable Users ; GCN-O1-OPTS-NEXT: Loop Strength Reduction -; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-OPTS-NEXT: Function Alias Analysis Results -; GCN-O1-OPTS-NEXT: Merge contiguous icmps into a memcmp -; GCN-O1-OPTS-NEXT: Natural Loop Information -; GCN-O1-OPTS-NEXT: Lazy Branch Probability Analysis -; GCN-O1-OPTS-NEXT: Lazy Block Frequency Analysis -; GCN-O1-OPTS-NEXT: Expand memcmp() to load/stores ; GCN-O1-OPTS-NEXT: Lower constant intrinsics ; GCN-O1-OPTS-NEXT: Remove unreachable blocks from the CFG ; GCN-O1-OPTS-NEXT: Natural Loop Information @@ -786,13 +772,6 @@ ; GCN-O2-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O2-NEXT: Induction Variable Users ; GCN-O2-NEXT: Loop Strength Reduction -; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Merge contiguous icmps into a memcmp -; GCN-O2-NEXT: Natural Loop Information -; GCN-O2-NEXT: Lazy Branch Probability Analysis -; GCN-O2-NEXT: Lazy Block Frequency Analysis -; GCN-O2-NEXT: Expand memcmp() to load/stores ; GCN-O2-NEXT: Lower constant intrinsics ; GCN-O2-NEXT: Remove unreachable blocks from the CFG ; GCN-O2-NEXT: Natural Loop Information @@ -1095,13 +1074,6 @@ ; GCN-O3-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O3-NEXT: Induction Variable Users ; GCN-O3-NEXT: Loop Strength Reduction -; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Merge contiguous icmps into a memcmp -; GCN-O3-NEXT: Natural Loop Information -; GCN-O3-NEXT: Lazy Branch Probability Analysis -; GCN-O3-NEXT: Lazy Block Frequency Analysis -; GCN-O3-NEXT: Expand memcmp() to load/stores ; GCN-O3-NEXT: Lower constant intrinsics ; GCN-O3-NEXT: Remove unreachable blocks from the CFG ; GCN-O3-NEXT: Natural Loop Information diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 5e565970fc3a8..f2bef2c7e46ac 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -21,13 +21,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/CodeGen/BPF/memcmp.ll b/llvm/test/CodeGen/BPF/memcmp.ll deleted file mode 100644 index 7ed8dc1e736f4..0000000000000 --- a/llvm/test/CodeGen/BPF/memcmp.ll +++ /dev/null @@ -1,77 +0,0 @@ -; RUN: llc -march=bpfel < %s | FileCheck %s -; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck %s -; -; Source code: -; /* set aligned 4 to minimize the number of loads */ -; struct build_id { -; unsigned char id[20]; -; } __attribute__((aligned(4))); -; -; /* try to compute a local build_id */ -; void bar1(ptr); -; -; /* the global build_id to compare */ -; struct build_id id2; -; -; int foo() -; { -; struct build_id id1; -; -; bar1(&id1); -; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0; -; } -; Compilation flags: -; clang -target bpf -S -O2 t.c -emit-llvm - - -%struct.build_id = type { [20 x i8] } - -@id2 = dso_local global %struct.build_id zeroinitializer, align 4 - -; Function Attrs: nounwind -define dso_local i32 @foo() local_unnamed_addr #0 { -entry: - %id11 = alloca [20 x i8], align 4 - call void @llvm.lifetime.start.p0(i64 20, ptr nonnull %id11) #4 - call void @bar1(ptr noundef nonnull %id11) #4 - %call = call i32 @memcmp(ptr noundef nonnull dereferenceable(20) %id11, ptr noundef nonnull dereferenceable(20) @id2, i64 noundef 20) #4 - %cmp = icmp eq i32 %call, 0 - %conv = zext i1 %cmp to i32 - call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %id11) #4 - ret i32 %conv -} - -; CHECK-DAG: *(u32 *)(r1 + 0) -; CHECK-DAG: *(u32 *)(r1 + 4) -; CHECK-DAG: *(u32 *)(r10 - 16) -; CHECK-DAG: *(u32 *)(r10 - 20) -; CHECK-DAG: *(u32 *)(r10 - 8) -; CHECK-DAG: *(u32 *)(r10 - 12) -; CHECK-DAG: *(u32 *)(r1 + 8) -; CHECK-DAG: *(u32 *)(r1 + 12) -; CHECK-DAG: *(u32 *)(r2 + 16) -; CHECK-DAG: *(u32 *)(r10 - 4) - -; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 - -declare dso_local void @bar1(ptr noundef) local_unnamed_addr #2 - -; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn -declare dso_local i32 @memcmp(ptr nocapture noundef, ptr nocapture noundef, i64 noundef) local_unnamed_addr #3 - -; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 - -attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } -attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #3 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"frame-pointer", i32 2} -!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git dea65874b2505f8f5e8e51fd8cad6908feb375ec)"} diff --git a/llvm/test/CodeGen/Generic/llc-start-stop.ll b/llvm/test/CodeGen/Generic/llc-start-stop.ll index b02472473a00c..9ada245835981 100644 --- a/llvm/test/CodeGen/Generic/llc-start-stop.ll +++ b/llvm/test/CodeGen/Generic/llc-start-stop.ll @@ -19,15 +19,15 @@ ; STOP-BEFORE-NOT: Loop Strength Reduction ; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER -; START-AFTER: -aa -mergeicmps +; START-AFTER: -gc-lowering ; START-AFTER: FunctionPass Manager -; START-AFTER-NEXT: Dominator Tree Construction +; START-AFTER-NEXT: Lower Garbage Collection Instructions ; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE ; START-BEFORE: -machine-branch-prob -regalloc-evict -regalloc-priority -domtree ; START-BEFORE: FunctionPass Manager ; START-BEFORE: Loop Strength Reduction -; START-BEFORE-NEXT: Basic Alias Analysis (stateless AA impl) +; START-BEFORE-NEXT: Lower Garbage Collection Instructions ; RUN: not --crash llc < %s -start-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-START-BEFORE ; RUN: not --crash llc < %s -stop-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-STOP-BEFORE diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 3134d940545e8..696d8c8be017c 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: Default Regalloc Priority Advisor @@ -44,13 +44,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll index dfaa149b7a474..ad053cf4d61a0 100644 --- a/llvm/test/CodeGen/M68k/pipeline.ll +++ b/llvm/test/CodeGen/M68k/pipeline.ll @@ -15,13 +15,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 6ce4416211cc4..1fdb4802eff03 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: Default Regalloc Priority Advisor @@ -45,13 +45,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll deleted file mode 100644 index 1da40d46aa773..0000000000000 --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ /dev/null @@ -1,168 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-n32:64" -target triple = "powerpc64le-unknown-linux-gnu" - -@zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4 -@zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4 -@zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4 -@zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4 -@zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4 -@zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4 -@zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4 -@zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4 - -declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1 - -; Check 4 bytes - requires 1 load for each param. -define signext i32 @zeroEqualityTest02(ptr %x, ptr %y) { -; CHECK-LABEL: zeroEqualityTest02: -; CHECK: # %bb.0: -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: xor 3, 3, 4 -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 -; CHECK-NEXT: xori 3, 3, 1 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 4) - %not.cmp = icmp ne i32 %call, 0 - %. = zext i1 %not.cmp to i32 - ret i32 %. -} - -; Check 16 bytes - requires 2 loads for each param (or use vectors?). -define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) { -; CHECK-LABEL: zeroEqualityTest01: -; CHECK: # %bb.0: -; CHECK-NEXT: ld 5, 0(3) -; CHECK-NEXT: ld 6, 0(4) -; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: bne 0, .LBB1_2 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: ld 5, 8(3) -; CHECK-NEXT: ld 4, 8(4) -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: cmpld 5, 4 -; CHECK-NEXT: beqlr 0 -; CHECK-NEXT: .LBB1_2: # %res_block -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 16) - %not.tobool = icmp ne i32 %call, 0 - %. = zext i1 %not.tobool to i32 - ret i32 %. -} - -; Check 7 bytes - requires 3 loads for each param. -define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) { -; CHECK-LABEL: zeroEqualityTest03: -; CHECK: # %bb.0: -; CHECK-NEXT: lwz 5, 0(3) -; CHECK-NEXT: lwz 6, 0(4) -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB2_3 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: lhz 5, 4(3) -; CHECK-NEXT: lhz 6, 4(4) -; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: bne 0, .LBB2_3 -; CHECK-NEXT: # %bb.2: # %loadbb2 -; CHECK-NEXT: lbz 5, 6(3) -; CHECK-NEXT: lbz 4, 6(4) -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: cmplw 5, 4 -; CHECK-NEXT: beqlr 0 -; CHECK-NEXT: .LBB2_3: # %res_block -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 7) - %not.lnot = icmp ne i32 %call, 0 - %cond = zext i1 %not.lnot to i32 - ret i32 %cond -} - -; Validate with > 0 -define signext i32 @zeroEqualityTest04() { -; CHECK-LABEL: zeroEqualityTest04: -; CHECK: # %bb.0: # %loadbb -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest02.buffer1, ptr @zeroEqualityTest02.buffer2, i64 16) - %not.cmp = icmp slt i32 %call, 1 - %. = zext i1 %not.cmp to i32 - ret i32 %. -} - -; Validate with < 0 -define signext i32 @zeroEqualityTest05() { -; CHECK-LABEL: zeroEqualityTest05: -; CHECK: # %bb.0: # %loadbb -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest03.buffer1, ptr @zeroEqualityTest03.buffer2, i64 16) - %call.lobit = lshr i32 %call, 31 - %call.lobit.not = xor i32 %call.lobit, 1 - ret i32 %call.lobit.not -} - -; Validate with memcmp()?: -define signext i32 @equalityFoldTwoConstants() { -; CHECK-LABEL: equalityFoldTwoConstants: -; CHECK: # %bb.0: # %loadbb -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr @zeroEqualityTest04.buffer2, i64 16) - %not.tobool = icmp eq i32 %call, 0 - %cond = zext i1 %not.tobool to i32 - ret i32 %cond -} - -define signext i32 @equalityFoldOneConstant(ptr %X) { -; CHECK-LABEL: equalityFoldOneConstant: -; CHECK: # %bb.0: -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: rldic 5, 5, 32, 31 -; CHECK-NEXT: cmpld 4, 5 -; CHECK-NEXT: bne 0, .LBB6_2 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: lis 5, -32768 -; CHECK-NEXT: ld 4, 8(3) -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: ori 5, 5, 1 -; CHECK-NEXT: rldic 5, 5, 1, 30 -; CHECK-NEXT: cmpld 4, 5 -; CHECK-NEXT: beq 0, .LBB6_3 -; CHECK-NEXT: .LBB6_2: # %res_block -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB6_3: # %endblock -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr %X, i64 16) - %not.tobool = icmp eq i32 %call, 0 - %cond = zext i1 %not.tobool to i32 - ret i32 %cond -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; CHECK-LABEL: length2_eq_nobuiltin_attr: -; CHECK: # %bb.0: -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stdu 1, -32(1) -; CHECK-NEXT: li 5, 2 -; CHECK-NEXT: std 0, 48(1) -; CHECK-NEXT: bl memcmp -; CHECK-NEXT: nop -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 -; CHECK-NEXT: addi 1, 1, 32 -; CHECK-NEXT: ld 0, 16(1) -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr - %m = tail call signext i32 @memcmp(ptr %X, ptr %Y, i64 2) nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll deleted file mode 100644 index 29910646c8937..0000000000000 --- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=PPC64LE - -; This tests interaction between MergeICmp and expand-memcmp. - -%"struct.std::pair" = type { i32, i32 } - -define zeroext i1 @opeq1( -; PPC64LE-LABEL: opeq1: -; PPC64LE: # %bb.0: # %"entry+land.rhs.i" -; PPC64LE-NEXT: ld 3, 0(3) -; PPC64LE-NEXT: ld 4, 0(4) -; PPC64LE-NEXT: cmpd 3, 4 -; PPC64LE-NEXT: li 3, 0 -; PPC64LE-NEXT: li 4, 1 -; PPC64LE-NEXT: iseleq 3, 4, 3 -; PPC64LE-NEXT: blr - ptr nocapture readonly dereferenceable(8) %a, - ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { -entry: - %0 = load i32, ptr %a, align 4 - %1 = load i32, ptr %b, align 4 - %cmp.i = icmp eq i32 %0, %1 - br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit - -land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1 - %2 = load i32, ptr %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1 - %3 = load i32, ptr %second2.i, align 4 - %cmp3.i = icmp eq i32 %2, %3 - br label %opeq1.exit - -opeq1.exit: - %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] - ret i1 %4 -} - - diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll deleted file mode 100644 index 0634534b9c9df..0000000000000 --- a/llvm/test/CodeGen/PowerPC/memcmp.ll +++ /dev/null @@ -1,62 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK - -define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { -; CHECK-LABEL: memcmp8: -; CHECK: # %bb.0: -; CHECK-NEXT: ldbrx 3, 0, 3 -; CHECK-NEXT: ldbrx 4, 0, 4 -; CHECK-NEXT: subc 5, 4, 3 -; CHECK-NEXT: subfe 5, 4, 4 -; CHECK-NEXT: subc 4, 3, 4 -; CHECK-NEXT: subfe 3, 3, 3 -; CHECK-NEXT: neg 5, 5 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: sub 3, 5, 3 -; CHECK-NEXT: extsw 3, 3 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8) - ret i32 %call -} - -define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { -; CHECK-LABEL: memcmp4: -; CHECK: # %bb.0: -; CHECK-NEXT: lwbrx 3, 0, 3 -; CHECK-NEXT: lwbrx 4, 0, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: sub 3, 3, 4 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldicl 3, 3, 1, 63 -; CHECK-NEXT: sub 3, 5, 3 -; CHECK-NEXT: extsw 3, 3 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) - ret i32 %call -} - -define signext i32 @memcmp2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { -; CHECK-LABEL: memcmp2: -; CHECK: # %bb.0: -; CHECK-NEXT: lhbrx 3, 0, 3 -; CHECK-NEXT: lhbrx 4, 0, 4 -; CHECK-NEXT: sub 3, 3, 4 -; CHECK-NEXT: extsw 3, 3 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 2) - ret i32 %call -} - -define signext i32 @memcmp1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { -; CHECK-LABEL: memcmp1: -; CHECK: # %bb.0: -; CHECK-NEXT: lbz 3, 0(3) -; CHECK-NEXT: lbz 4, 0(4) -; CHECK-NEXT: sub 3, 3, 4 -; CHECK-NEXT: extsw 3, 3 -; CHECK-NEXT: blr - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 1) #2 - ret i32 %call -} - -declare signext i32 @memcmp(ptr, ptr, i64) diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll deleted file mode 100644 index 0a8bec7dc0e3f..0000000000000 --- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll +++ /dev/null @@ -1,178 +0,0 @@ -; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s -; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s --check-prefix=CHECK-BE - -define signext i32 @test1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { -entry: - ; CHECK-LABEL: @test1( - ; CHECK-LABEL: res_block:{{.*}} - ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 - ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 - ; CHECK-NEXT: br label %endblock - - ; CHECK-LABEL: loadbb:{{.*}} - ; CHECK: [[LOAD1:%[0-9]+]] = load i64, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block - - ; CHECK-LABEL: loadbb1:{{.*}} - ; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 - ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 - ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]] - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]] - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block - - ; CHECK-BE-LABEL: @test1( - ; CHECK-BE-LABEL: res_block:{{.*}} - ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 - ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 - ; CHECK-BE-NEXT: br label %endblock - - ; CHECK-BE-LABEL: loadbb:{{.*}} - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block - - ; CHECK-BE-LABEL: loadbb1:{{.*}} - ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 - ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 - ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]] - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block - - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) - ret i32 %call -} - -declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1 - -define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { - ; CHECK-LABEL: @test2( - ; CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) - ; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32 - ; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32 - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]] - ; CHECK-NEXT: ret i32 [[SUB]] - - ; CHECK-BE-LABEL: @test2( - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr - ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32 - ; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32 - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]] - ; CHECK-BE-NEXT: ret i32 [[SUB]] - -entry: - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) - ret i32 %call -} - -define signext i32 @test3(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { - ; CHECK-LABEL: res_block:{{.*}} - ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 - ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 - ; CHECK-NEXT: br label %endblock - - ; CHECK-LABEL: loadbb:{{.*}} - ; CHECK: [[LOAD1:%[0-9]+]] = load i64, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block - - ; CHECK-LABEL: loadbb1:{{.*}} - ; CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block - - ; CHECK-LABEL: loadbb2:{{.*}} - ; CHECK: [[LOAD1:%[0-9]+]] = load i16, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]]) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64 - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64 - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block - - ; CHECK-LABEL: loadbb3:{{.*}} - ; CHECK: [[LOAD1:%[0-9]+]] = load i8, ptr - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: br label %endblock - - ; CHECK-BE-LABEL: res_block:{{.*}} - ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 - ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 - ; CHECK-BE-NEXT: br label %endblock - - ; CHECK-BE-LABEL: loadbb:{{.*}} - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block - - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr - ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 - ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block - - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr - ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64 - ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64 - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block - - ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, ptr - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr - ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 - ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: br label %endblock - -entry: - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 15) - ret i32 %call -} - ; CHECK: call = tail call signext i32 @memcmp - ; CHECK-BE: call = tail call signext i32 @memcmp -define signext i32 @test4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { - -entry: - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 65) - ret i32 %call -} - -define signext i32 @test5(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2, i32 signext %SIZE) { - ; CHECK: call = tail call signext i32 @memcmp - ; CHECK-BE: call = tail call signext i32 @memcmp -entry: - %conv = sext i32 %SIZE to i64 - %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 %conv) - ret i32 %call -} diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 364c1e430b915..1401fd004d919 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -45,13 +45,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics @@ -194,7 +187,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis ; CHECK-NEXT: RISC-V Zcmp move merging pass -; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass +; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Unpack machine instruction bundles diff --git a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll deleted file mode 100644 index c16e2adb7a078..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll +++ /dev/null @@ -1,49 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 - -; This tests interaction between MergeICmp and ExpandMemCmp. - -%"struct.std::pair" = type { i32, i32 } - -define zeroext i1 @opeq1( -; X86-LABEL: opeq1: -; X86: # %bb.0: # %"entry+land.rhs.i" -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl -; -; X64-LABEL: opeq1: -; X64: # %bb.0: # %"entry+land.rhs.i" -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - ptr nocapture readonly dereferenceable(8) %a, - ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync { -entry: - %0 = load i32, ptr %a, align 4 - %1 = load i32, ptr %b, align 4 - %cmp.i = icmp eq i32 %0, %1 - br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit - -land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1 - %2 = load i32, ptr %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1 - %3 = load i32, ptr %second2.i, align 4 - %cmp3.i = icmp eq i32 %2, %3 - br label %opeq1.exit - -opeq1.exit: - %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] - ret i1 %4 -} - - diff --git a/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll deleted file mode 100644 index ae1320f8b0868..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-minsize-x32.ll +++ /dev/null @@ -1,445 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i32) - -define i32 @length2(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length2: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length2_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: cmpw (%eax), %cx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length2_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw $12849, (%eax) # imm = 0x3231 -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length2_eq_nobuiltin_attr: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length3: -; X86: # %bb.0: -; X86-NEXT: pushl $3 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length3_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $3 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length4: -; X86: # %bb.0: -; X86-NEXT: pushl $4 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length4_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: cmpl (%eax), %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length4_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length5: -; X86: # %bb.0: -; X86-NEXT: pushl $5 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length5_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $5 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length8: -; X86: # %bb.0: -; X86-NEXT: pushl $8 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length8_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $8 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length8_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $8 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length12_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length12: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length16: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize { -; X86-NOSSE-LABEL: length16_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu (%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X86-SSE2-NEXT: pmovmskb %xmm1, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind minsize { -; X86-NOSSE-LABEL: length16_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length24: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize { -; X86-LABEL: length24_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length24_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length32: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize { -; X86-LABEL: length32_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length32_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind minsize { -; X86-LABEL: length64: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize { -; X86-LABEL: length64_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind minsize { -; X86-LABEL: length64_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - diff --git a/llvm/test/CodeGen/X86/memcmp-minsize.ll b/llvm/test/CodeGen/X86/memcmp-minsize.ll deleted file mode 100644 index 544d1c49f26b9..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-minsize.ll +++ /dev/null @@ -1,433 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) - -define i32 @length2(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length2: -; X64: # %bb.0: -; X64-NEXT: pushq $2 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length2_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpw (%rsi), %ax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind minsize { -; X64-LABEL: length2_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpw $12849, (%rdi) # imm = 0x3231 -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length2_eq_nobuiltin_attr: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $2 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length3: -; X64: # %bb.0: -; X64-NEXT: pushq $3 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length3_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $3 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length4: -; X64: # %bb.0: -; X64-NEXT: pushq $4 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length4_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: cmpl (%rsi), %eax -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind minsize { -; X64-LABEL: length4_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length5: -; X64: # %bb.0: -; X64-NEXT: pushq $5 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length5_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $5 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length8: -; X64: # %bb.0: -; X64-NEXT: pushq $8 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length8_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind minsize { -; X64-LABEL: length8_eq_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length12_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $12 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length12: -; X64: # %bb.0: -; X64-NEXT: pushq $12 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind minsize { -; -; X64-LABEL: length16: -; X64: # %bb.0: -; X64-NEXT: pushq $16 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize { -; X64-SSE2-LABEL: length16_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rsi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind minsize { -; X64-SSE2-LABEL: length16_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length24: -; X64: # %bb.0: -; X64-NEXT: pushq $24 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize { -; X64-LABEL: length24_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $24 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind minsize { -; X64-LABEL: length24_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $24 -; X64-NEXT: popq %rdx -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length32: -; X64: # %bb.0: -; X64-NEXT: pushq $32 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize { -; X64-SSE2-LABEL: length32_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: pushq $32 -; X64-SSE2-NEXT: popq %rdx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind minsize { -; X64-SSE2-LABEL: length32_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: pushq $32 -; X64-SSE2-NEXT: popq %rdx -; X64-SSE2-NEXT: movl $.L.str, %esi -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind minsize { -; X64-LABEL: length64: -; X64: # %bb.0: -; X64-NEXT: pushq $64 -; X64-NEXT: popq %rdx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize { -; X64-LABEL: length64_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $64 -; X64-NEXT: popq %rdx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind minsize { -; X64-LABEL: length64_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: pushq $64 -; X64-NEXT: popq %rdx -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll deleted file mode 100644 index 0253d13122608..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ /dev/null @@ -1,2911 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i32) - -define i32 @length0(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0: -; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - ret i32 %m - } - -define i1 @length0_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0_eq: -; X86: # %bb.0: -; X86-NEXT: movb $1, %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length0_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0_lt: -; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length2(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: cmpw (%eax), %cx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_lt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_gt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_gt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: setg %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind { -; X86-LABEL: length2_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_eq_nobuiltin_attr: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length3: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length3_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %edx -; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orw %dx, %ax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: cmpl (%eax), %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_lt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setb %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_gt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_gt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: seta %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind { -; X86-LABEL: length4_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB16_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB16_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length5_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5_lt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_2 -; X86-NEXT: .LBB18_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB18_2: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length7(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB19_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB19_3 -; X86-NEXT: .LBB19_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB19_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - ret i32 %m -} - -define i1 @length7_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 3(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 3(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length7_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7_lt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB21_3: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length8: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB22_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length8_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind { -; X86-LABEL: length8_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-NEXT: xorl (%eax), %ecx -; X86-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-NEXT: xorl 4(%eax), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length9_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length9_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movzbl 8(%ecx), %ecx -; X86-NEXT: xorb 8(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length10_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length10_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movzwl 8(%ecx), %ecx -; X86-NEXT: xorw 8(%eax), %cx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length11_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length11_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movl 7(%ecx), %ecx -; X86-NEXT: xorl 7(%eax), %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length12_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: movl 8(%ecx), %ecx -; X86-NEXT: xorl 8(%eax), %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: setne %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length12: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB29_4 -; X86-NEXT: .LBB29_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB29_4: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - ret i32 %m -} - -define i1 @length13_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length13_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: movl 4(%edx), %eax -; X86-NEXT: xorl (%ecx), %esi -; X86-NEXT: xorl 4(%ecx), %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl 8(%edx), %esi -; X86-NEXT: xorl 8(%ecx), %esi -; X86-NEXT: movzbl 12(%edx), %edx -; X86-NEXT: xorb 12(%ecx), %dl -; X86-NEXT: movzbl %dl, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length14_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length14_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: movl 4(%edx), %eax -; X86-NEXT: xorl (%ecx), %esi -; X86-NEXT: xorl 4(%ecx), %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl 8(%edx), %esi -; X86-NEXT: xorl 8(%ecx), %esi -; X86-NEXT: movzwl 12(%edx), %edx -; X86-NEXT: xorw 12(%ecx), %dx -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length15_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15_eq: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: movl 4(%edx), %eax -; X86-NEXT: xorl (%ecx), %esi -; X86-NEXT: xorl 4(%ecx), %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl 8(%edx), %esi -; X86-NEXT: xorl 8(%ecx), %esi -; X86-NEXT: movl 11(%edx), %edx -; X86-NEXT: xorl 11(%ecx), %edx -; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: sete %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length16: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 -; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB33_5 -; X86-NEXT: .LBB33_4: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB33_5: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length16_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl (%edx), %esi -; X86-NOSSE-NEXT: movl 4(%edx), %eax -; X86-NOSSE-NEXT: xorl (%ecx), %esi -; X86-NOSSE-NEXT: xorl 4(%ecx), %eax -; X86-NOSSE-NEXT: orl %esi, %eax -; X86-NOSSE-NEXT: movl 8(%edx), %esi -; X86-NOSSE-NEXT: xorl 8(%ecx), %esi -; X86-NOSSE-NEXT: movl 12(%edx), %edx -; X86-NOSSE-NEXT: xorl 12(%ecx), %edx -; X86-NOSSE-NEXT: orl %esi, %edx -; X86-NOSSE-NEXT: orl %eax, %edx -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length16_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl %esi -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE1-NEXT: movl (%edx), %esi -; X86-SSE1-NEXT: movl 4(%edx), %eax -; X86-SSE1-NEXT: xorl (%ecx), %esi -; X86-SSE1-NEXT: xorl 4(%ecx), %eax -; X86-SSE1-NEXT: orl %esi, %eax -; X86-SSE1-NEXT: movl 8(%edx), %esi -; X86-SSE1-NEXT: xorl 8(%ecx), %esi -; X86-SSE1-NEXT: movl 12(%edx), %edx -; X86-SSE1-NEXT: xorl 12(%ecx), %edx -; X86-SSE1-NEXT: orl %esi, %edx -; X86-SSE1-NEXT: orl %eax, %edx -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: popl %esi -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu (%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X86-SSE2-NEXT: pmovmskb %xmm1, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length16_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu (%eax), %xmm1 -; X86-SSE41-NEXT: pxor %xmm0, %xmm1 -; X86-SSE41-NEXT: ptest %xmm1, %xmm1 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length16_lt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 -; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB35_5 -; X86-NEXT: .LBB35_4: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB35_5: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length16_gt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %eax -; X86-NEXT: movl (%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %eax -; X86-NEXT: movl 4(%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 -; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %eax -; X86-NEXT: movl 8(%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 -; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %eax -; X86-NEXT: movl 12(%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: je .LBB36_5 -; X86-NEXT: .LBB36_4: # %res_block -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: sbbl %edx, %edx -; X86-NEXT: orl $1, %edx -; X86-NEXT: .LBB36_5: # %endblock -; X86-NEXT: testl %edx, %edx -; X86-NEXT: setg %al -; X86-NEXT: popl %esi -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length16_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-NOSSE-NEXT: xorl (%eax), %ecx -; X86-NOSSE-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-NOSSE-NEXT: xorl 4(%eax), %edx -; X86-NOSSE-NEXT: orl %ecx, %edx -; X86-NOSSE-NEXT: movl $825243960, %ecx # imm = 0x31303938 -; X86-NOSSE-NEXT: xorl 8(%eax), %ecx -; X86-NOSSE-NEXT: movl $892613426, %esi # imm = 0x35343332 -; X86-NOSSE-NEXT: xorl 12(%eax), %esi -; X86-NOSSE-NEXT: orl %ecx, %esi -; X86-NOSSE-NEXT: orl %edx, %esi -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length16_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl %esi -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE1-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-SSE1-NEXT: xorl (%eax), %ecx -; X86-SSE1-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-SSE1-NEXT: xorl 4(%eax), %edx -; X86-SSE1-NEXT: orl %ecx, %edx -; X86-SSE1-NEXT: movl $825243960, %ecx # imm = 0x31303938 -; X86-SSE1-NEXT: xorl 8(%eax), %ecx -; X86-SSE1-NEXT: movl $892613426, %esi # imm = 0x35343332 -; X86-SSE1-NEXT: xorl 12(%eax), %esi -; X86-SSE1-NEXT: orl %ecx, %esi -; X86-SSE1-NEXT: orl %edx, %esi -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: popl %esi -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length16_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length24: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length24_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length24_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $24 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length24_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length24_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length24_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length24_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length24_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $24 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length24_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length31(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length31: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind - ret i32 %m -} - -define i1 @length31_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length31_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length31_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length31_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length31_eq_prefer128: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq_prefer128: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq_prefer128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq_prefer128: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length31_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length32: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length32_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length32_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length32_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length32_eq_prefer128: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq_prefer128: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_prefer128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq_prefer128: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length32_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length48(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length48: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind - ret i32 %m -} - -define i1 @length48_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length48_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $48 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length48_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $48 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length48_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2 -; X86-SSE2-NEXT: pand %xmm0, %xmm2 -; X86-SSE2-NEXT: pmovmskb %xmm2, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length48_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm1, %xmm2 -; X86-SSE41-NEXT: por %xmm0, %xmm2 -; X86-SSE41-NEXT: ptest %xmm2, %xmm2 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length48_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length48_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length48_eq_prefer128: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $48 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length48_eq_prefer128: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $48 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length48_eq_prefer128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2 -; X86-SSE2-NEXT: pand %xmm0, %xmm2 -; X86-SSE2-NEXT: pmovmskb %xmm2, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length48_eq_prefer128: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm1, %xmm2 -; X86-SSE41-NEXT: por %xmm0, %xmm2 -; X86-SSE41-NEXT: ptest %xmm2, %xmm2 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length48_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $48 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length48_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $48 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length48_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: pand %xmm0, %xmm2 -; X86-SSE2-NEXT: pmovmskb %xmm2, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length48_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE41-NEXT: por %xmm0, %xmm2 -; X86-SSE41-NEXT: ptest %xmm2, %xmm2 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length63(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length63: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind - ret i32 %m -} - -define i1 @length63_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length63_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $63 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length63_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $63 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length63_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqu 47(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 47(%eax), %xmm3 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm3 -; X86-SSE2-NEXT: pand %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm0, %xmm3 -; X86-SSE2-NEXT: pmovmskb %xmm3, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length63_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm1, %xmm2 -; X86-SSE41-NEXT: movdqu 47(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 47(%eax), %xmm3 -; X86-SSE41-NEXT: pxor %xmm1, %xmm3 -; X86-SSE41-NEXT: por %xmm2, %xmm3 -; X86-SSE41-NEXT: por %xmm0, %xmm3 -; X86-SSE41-NEXT: ptest %xmm3, %xmm3 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length63_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length63_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length63_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $63 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length63_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $63 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length63_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: movdqu 47(%eax), %xmm3 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: pand %xmm3, %xmm2 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length63_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: movdqu 47(%eax), %xmm3 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE41-NEXT: por %xmm3, %xmm2 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length64: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length64_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $64 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length64_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $64 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length64_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqu 48(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu 48(%eax), %xmm3 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm3 -; X86-SSE2-NEXT: pand %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm0, %xmm3 -; X86-SSE2-NEXT: pmovmskb %xmm3, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length64_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: movdqu 32(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm1, %xmm2 -; X86-SSE41-NEXT: movdqu 48(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu 48(%eax), %xmm3 -; X86-SSE41-NEXT: pxor %xmm1, %xmm3 -; X86-SSE41-NEXT: por %xmm2, %xmm3 -; X86-SSE41-NEXT: por %xmm0, %xmm3 -; X86-SSE41-NEXT: ptest %xmm3, %xmm3 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length64_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length64_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length64_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $64 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length64_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $64 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length64_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE2-NEXT: movdqu 48(%eax), %xmm3 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: pand %xmm3, %xmm2 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length64_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 -; X86-SSE41-NEXT: movdqu 48(%eax), %xmm3 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE41-NEXT: por %xmm3, %xmm2 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length96(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length96: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind - ret i32 %m -} - -define i1 @length96_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_eq_const(ptr %X) nounwind { -; X86-LABEL: length96_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length127(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length127: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind - ret i32 %m -} - -define i1 @length127_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_eq_const(ptr %X) nounwind { -; X86-LABEL: length127_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length128(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length128: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind - ret i32 %m -} - -define i1 @length128_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_eq_const(ptr %X) nounwind { -; X86-LABEL: length128_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length192(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length192: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind - ret i32 %m -} - -define i1 @length192_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_eq_const(ptr %X) nounwind { -; X86-LABEL: length192_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length255(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length255: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind - ret i32 %m -} - -define i1 @length255_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_eq_const(ptr %X) nounwind { -; X86-LABEL: length255_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length256(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length256: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind - ret i32 %m -} - -define i1 @length256_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_eq_const(ptr %X) nounwind { -; X86-LABEL: length256_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length384(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length384: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind - ret i32 %m -} - -define i1 @length384_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_eq_const(ptr %X) nounwind { -; X86-LABEL: length384_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length511(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length511: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind - ret i32 %m -} - -define i1 @length511_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_eq_const(ptr %X) nounwind { -; X86-LABEL: length511_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length512(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length512: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind - ret i32 %m -} - -define i1 @length512_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_eq_const(ptr %X) nounwind { -; X86-LABEL: length512_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks that we do not do stupid things with huge sizes. -define i32 @huge_length(ptr %X, ptr %Y) nounwind { -; X86-LABEL: huge_length: -; X86: # %bb.0: -; X86-NEXT: pushl $-1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind - ret i32 %m -} - -define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: huge_length_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $-1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks non-constant sizes. -define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind { -; X86-LABEL: nonconst_length: -; X86: # %bb.0: -; X86-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind - ret i32 %m -} - -define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind { -; X86-LABEL: nonconst_length_eq: -; X86: # %bb.0: -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll deleted file mode 100644 index da46ea4065579..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ /dev/null @@ -1,4006 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) - -define i32 @length0(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - ret i32 %m - } - -define i1 @length0_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0_eq: -; X64: # %bb.0: -; X64-NEXT: movb $1, %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length0_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0_lt: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length2(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpw (%rsi), %ax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_lt: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_gt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_gt: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind { -; X64-LABEL: length2_eq_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_eq_nobuiltin_attr: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $2, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length3: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %edx -; X64-NEXT: rolw $8, %cx -; X64-NEXT: rolw $8, %dx -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length3_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: cmpl (%rsi), %eax -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax -; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_gt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_gt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax -; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind { -; X64-LABEL: length4_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB16_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB16_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movzbl 4(%rdi), %ecx -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length5_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length7(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB19_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB19_3 -; X64-NEXT: .LBB19_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB19_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - ret i32 %m -} - -define i1 @length7_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: xorl 3(%rsi), %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length7_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB21_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB21_3 -; X64-NEXT: .LBB21_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB21_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length8: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length8_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind { -; X64-LABEL: length8_eq_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length9_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length9_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzbl 8(%rdi), %ecx -; X64-NEXT: xorb 8(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length10_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length10_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzwl 8(%rdi), %ecx -; X64-NEXT: xorw 8(%rsi), %cx -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length11_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length11_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 3(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 3(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length12_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length12: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB29_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB29_3 -; X64-NEXT: .LBB29_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB29_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -define i1 @length13_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length13_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 5(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 5(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length14_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length14_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 6(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 6(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length15_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 7(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length16: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB33_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB33_3 -; X64-NEXT: .LBB33_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB33_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length16_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length16_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm0, %xmm1 -; X64-SSE41-NEXT: ptest %xmm1, %xmm1 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length16_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length16_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length16_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB35_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB35_3 -; X64-NEXT: .LBB35_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB35_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length16_gt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB36_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB36_3 -; X64-NEXT: .LBB36_2: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB36_3: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length16_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length16_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length16_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length16_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length24: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB38_4 -; X64-NEXT: .LBB38_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB38_4: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length24_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length24_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm0, %xmm1 -; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: por %xmm1, %xmm2 -; X64-SSE41-NEXT: ptest %xmm2, %xmm2 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length24_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length24_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length24_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB40_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB40_3 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB40_4 -; X64-NEXT: .LBB40_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB40_4: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length24_gt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB41_4 -; X64-NEXT: .LBB41_3: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB41_4: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length24_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length24_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length24_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length24_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = [959985462,858927408,0,0] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length31(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length31: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB43_5 -; X64-NEXT: .LBB43_4: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB43_5: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind - ret i32 %m -} - -define i1 @length31_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length31_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length31_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB45_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB45_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB45_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB45_5 -; X64-NEXT: .LBB45_4: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB45_5: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length31_gt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rax -; X64-NEXT: movq 23(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB46_5 -; X64-NEXT: .LBB46_4: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB46_5: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-SSE2-LABEL: length31_eq_prefer128: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq_prefer128: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq_prefer128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq_prefer128: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length31_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length32: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB49_5 -; X64-NEXT: .LBB49_4: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB49_5: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length32_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length32_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB51_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB51_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB51_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB51_5 -; X64-NEXT: .LBB51_4: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB51_5: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length32_gt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 -; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 -; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rax -; X64-NEXT: movq 24(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB52_5 -; X64-NEXT: .LBB52_4: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB52_5: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-SSE2-LABEL: length32_eq_prefer128: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq_prefer128: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length32_eq_prefer128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq_prefer128: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length32_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length48(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length48: -; X64: # %bb.0: -; X64-NEXT: movl $48, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind - ret i32 %m -} - -define i1 @length48_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length48_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm3 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm3, %xmm0 -; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 -; X64-SSE2-NEXT: pand %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length48_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm3 -; X64-SSE41-NEXT: pxor %xmm0, %xmm3 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm3, %xmm0 -; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm2, %xmm1 -; X64-SSE41-NEXT: por %xmm0, %xmm1 -; X64-SSE41-NEXT: ptest %xmm1, %xmm1 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length48_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1 -; X64-AVX1-NEXT: vmovups 32(%rsi), %xmm2 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length48_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX2-NEXT: vmovdqu 32(%rsi), %xmm2 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length48_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length48_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length48_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $48, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length48_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $48, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-SSE2-LABEL: length48_eq_prefer128: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm3 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm3, %xmm0 -; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 -; X64-SSE2-NEXT: pand %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length48_eq_prefer128: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm3 -; X64-SSE41-NEXT: pxor %xmm0, %xmm3 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm3, %xmm0 -; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm2, %xmm1 -; X64-SSE41-NEXT: por %xmm0, %xmm1 -; X64-SSE41-NEXT: ptest %xmm1, %xmm1 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length48_eq_prefer128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 -; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm1 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length48_eq_prefer128: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm4 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm5 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm4, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm0, %k1 -; X64-MIC-AVX-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm5, %zmm2, %k1 -; X64-MIC-AVX-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length48_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE2-NEXT: pand %xmm0, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length48_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE41-NEXT: por %xmm0, %xmm2 -; X64-SSE41-NEXT: ptest %xmm2, %xmm2 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length48_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length48_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length48_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length48_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length63(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length63: -; X64: # %bb.0: -; X64-NEXT: movl $63, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind - ret i32 %m -} - -define i1 @length63_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length63_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu 47(%rdi), %xmm3 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm4 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm4, %xmm0 -; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 -; X64-SSE2-NEXT: movdqu 47(%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pand %xmm0, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length63_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu 47(%rdi), %xmm3 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm4 -; X64-SSE41-NEXT: pxor %xmm0, %xmm4 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm4, %xmm0 -; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm2, %xmm1 -; X64-SSE41-NEXT: movdqu 47(%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm3, %xmm2 -; X64-SSE41-NEXT: por %xmm1, %xmm2 -; X64-SSE41-NEXT: por %xmm0, %xmm2 -; X64-SSE41-NEXT: ptest %xmm2, %xmm2 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length63_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 31(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length63_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length63_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length63_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length63_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $63, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length63_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $63, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length63_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu 47(%rdi), %xmm3 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE2-NEXT: pand %xmm3, %xmm2 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length63_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu 47(%rdi), %xmm3 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE41-NEXT: por %xmm3, %xmm2 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length63_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length63_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length63_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length63_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length64: -; X64: # %bb.0: -; X64-NEXT: movl $64, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length64_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu 48(%rdi), %xmm3 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm4 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm4, %xmm0 -; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 -; X64-SSE2-NEXT: movdqu 48(%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pand %xmm0, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length64_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu 48(%rdi), %xmm3 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm4 -; X64-SSE41-NEXT: pxor %xmm0, %xmm4 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm4, %xmm0 -; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm2, %xmm1 -; X64-SSE41-NEXT: movdqu 48(%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm3, %xmm2 -; X64-SSE41-NEXT: por %xmm1, %xmm2 -; X64-SSE41-NEXT: por %xmm0, %xmm2 -; X64-SSE41-NEXT: ptest %xmm2, %xmm2 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length64_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-AVX512-NEXT: kortestw %k0, %k0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length64_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm3 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length64_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length64_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $64, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length64_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $64, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length64_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE2-NEXT: movdqu 48(%rdi), %xmm3 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE2-NEXT: pand %xmm3, %xmm2 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length64_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 -; X64-SSE41-NEXT: movdqu 48(%rdi), %xmm3 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; X64-SSE41-NEXT: por %xmm3, %xmm2 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length64_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-AVX512-NEXT: kortestw %k0, %k0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length64_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length64_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length96(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length96: -; X64: # %bb.0: -; X64-NEXT: movl $96, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind - ret i32 %m -} - -define i1 @length96_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length96_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $96, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length96_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length96_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length96_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512BW-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length96_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length96_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm3 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm4 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm5 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length96_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length96_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $96, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length96_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $96, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length96_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $96, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length96_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length96_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length96_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length96_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length96_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm0, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length96_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length127(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length127: -; X64: # %bb.0: -; X64-NEXT: movl $127, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind - ret i32 %m -} - -define i1 @length127_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length127_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $127, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length127_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vmovups 95(%rdi), %ymm3 -; X64-AVX1-NEXT: vxorps 95(%rsi), %ymm3, %ymm3 -; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm2 -; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length127_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3 -; X64-AVX2-NEXT: vpxor 95(%rsi), %ymm3, %ymm3 -; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm2 -; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length127_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb 63(%rsi), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length127_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length127_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3 -; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm4 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm5 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm6 -; X64-MIC-AVX2-NEXT: vmovdqu 95(%rsi), %ymm7 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm7, %zmm3, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm6, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm1, %k1 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm0, %k2 -; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length127_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length127_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $127, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length127_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $127, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length127_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $127, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length127_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vmovups 95(%rdi), %ymm3 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length127_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length127_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+63(%rip), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length127_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length127_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 95(%rdi), %ymm3 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [943142453,842084409,909456435,809056311,875770417,943142453,842084409,909456435] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm3, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k2 -; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length127_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length128(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length128: -; X64: # %bb.0: -; X64-NEXT: movl $128, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind - ret i32 %m -} - -define i1 @length128_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length128_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $128, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length128_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vmovups 96(%rdi), %ymm3 -; X64-AVX1-NEXT: vxorps 96(%rsi), %ymm3, %ymm3 -; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm2 -; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length128_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3 -; X64-AVX2-NEXT: vpxor 96(%rsi), %ymm3, %ymm3 -; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm2 -; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length128_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length128_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length128_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3 -; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm4 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm5 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rsi), %ymm6 -; X64-MIC-AVX2-NEXT: vmovdqu 96(%rsi), %ymm7 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm7, %zmm3, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm6, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm5, %zmm1, %k1 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm0, %k2 -; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length128_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length128_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $128, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length128_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $128, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length128_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $128, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length128_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; X64-AVX1-NEXT: vmovups 96(%rdi), %ymm3 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; X64-AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length128_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; X64-AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length128_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length128_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length128_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 96(%rdi), %ymm3 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [959985462,858927408,926299444,825243960,892613426,959985462,858927408,926299444] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm4, %zmm3, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [926299444,825243960,892613426,959985462,858927408,926299444,825243960,892613426] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; X64-MIC-AVX2-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k2 -; X64-MIC-AVX2-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length128_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length192(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length192: -; X64: # %bb.0: -; X64-NEXT: movl $192, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind - ret i32 %m -} - -define i1 @length192_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length192_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $192, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length192_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $192, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length192_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $192, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length192_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length192_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length192_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $192, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length192_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length192_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length192_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length192_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $192, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length192_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $192, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length192_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $192, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length192_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length192_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length192_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $192, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length192_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length255(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length255: -; X64: # %bb.0: -; X64-NEXT: movl $255, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind - ret i32 %m -} - -define i1 @length255_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length255_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $255, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length255_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $255, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length255_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $255, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length255_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-AVX512BW-NEXT: vpcmpneqb 191(%rsi), %zmm3, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k1 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k2 -; X64-AVX512BW-NEXT: korq %k1, %k2, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length255_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-AVX512F-NEXT: vpcmpneqd 191(%rsi), %zmm3, %k0 -; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2 -; X64-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length255_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $255, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length255_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 191(%rsi), %zmm3, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2 -; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length255_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length255_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length255_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $255, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length255_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $255, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length255_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $255, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length255_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+191(%rip), %zmm3, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k2 -; X64-AVX512BW-NEXT: korq %k1, %k2, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length255_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+191(%rip), %zmm3, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2 -; X64-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length255_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $255, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length255_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vmovdqu64 191(%rdi), %zmm3 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+191(%rip), %zmm3, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2 -; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length256(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length256: -; X64: # %bb.0: -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind - ret i32 %m -} - -define i1 @length256_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length256_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $256, %edx # imm = 0x100 -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length256_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $256, %edx # imm = 0x100 -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length256_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $256, %edx # imm = 0x100 -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length256_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-AVX512BW-NEXT: vpcmpneqb 192(%rsi), %zmm3, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb 128(%rsi), %zmm2, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k1 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k2 -; X64-AVX512BW-NEXT: korq %k1, %k2, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length256_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-AVX512F-NEXT: vpcmpneqd 192(%rsi), %zmm3, %k0 -; X64-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2 -; X64-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length256_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $256, %edx # imm = 0x100 -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length256_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 192(%rsi), %zmm3, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 128(%rsi), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k2 -; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length256_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length256_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length256_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $256, %edx # imm = 0x100 -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length256_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $256, %edx # imm = 0x100 -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length256_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $256, %edx # imm = 0x100 -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length256_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+192(%rip), %zmm3, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512BW-NEXT: korq %k0, %k1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k2 -; X64-AVX512BW-NEXT: korq %k1, %k2, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length256_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+192(%rip), %zmm3, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2 -; X64-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length256_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $256, %edx # imm = 0x100 -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length256_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vmovdqu64 128(%rdi), %zmm2 -; X64-MIC-AVX512F-NEXT: vmovdqu64 192(%rdi), %zmm3 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+192(%rip), %zmm3, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+128(%rip), %zmm2, %k1 -; X64-MIC-AVX512F-NEXT: korw %k0, %k1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k2 -; X64-MIC-AVX512F-NEXT: korw %k1, %k2, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length384(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length384: -; X64: # %bb.0: -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind - ret i32 %m -} - -define i1 @length384_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_eq_const(ptr %X) nounwind { -; X64-LABEL: length384_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length511(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length511: -; X64: # %bb.0: -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind - ret i32 %m -} - -define i1 @length511_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_eq_const(ptr %X) nounwind { -; X64-LABEL: length511_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length512(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length512: -; X64: # %bb.0: -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind - ret i32 %m -} - -define i1 @length512_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_eq_const(ptr %X) nounwind { -; X64-LABEL: length512_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks that we do not do stupid things with huge sizes. -define i32 @huge_length(ptr %X, ptr %Y) nounwind { -; X64-LABEL: huge_length: -; X64: # %bb.0: -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - ret i32 %m -} - -define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: huge_length_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks non-constant sizes. -define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { -; X64-LABEL: nonconst_length: -; X64: # %bb.0: -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - ret i32 %m -} - -define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { -; X64-LABEL: nonconst_length_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll deleted file mode 100644 index 762691151f4bd..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ /dev/null @@ -1,583 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i32) -declare dso_local i32 @bcmp(ptr, ptr, i32) - -define i32 @length2(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length2_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: cmpw (%eax), %cx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind optsize { -; X86-LABEL: length2_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length2_eq_nobuiltin_attr: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length3: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB4_2: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length3_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %edx -; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movb 2(%ecx), %cl -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orw %dx, %ax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length4: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length4_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: cmpl (%eax), %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind optsize { -; X86-LABEL: length4_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length5: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB9_2: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length5_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movb 4(%ecx), %cl -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length8: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB11_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length8_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind optsize { -; X86-LABEL: length8_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-NEXT: xorl (%eax), %ecx -; X86-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-NEXT: xorl 4(%eax), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length12_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length12: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length16: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize { -; X86-NOSSE-LABEL: length16_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu (%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X86-SSE2-NEXT: pmovmskb %xmm1, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind optsize { -; X86-NOSSE-LABEL: length16_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length24: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize { -; X86-NOSSE-LABEL: length24_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind optsize { -; X86-NOSSE-LABEL: length24_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length32: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize { -; X86-NOSSE-LABEL: length32_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind optsize { -; X86-NOSSE-LABEL: length32_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: length64: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize { -; X86-LABEL: length64_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind optsize { -; X86-LABEL: length64_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize { -; X86-LABEL: bcmp_length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw (%ecx), %dx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll deleted file mode 100644 index c0c7b98d471cd..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ /dev/null @@ -1,596 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) -declare dso_local i32 @bcmp(ptr, ptr, i64) - -define i32 @length2(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length2_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpw (%rsi), %ax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind optsize { -; X64-LABEL: length2_eq_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length2_eq_nobuiltin_attr: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $2, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length3: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %edx -; X64-NEXT: rolw $8, %cx -; X64-NEXT: rolw $8, %dx -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length3_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movb 2(%rdi), %cl -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length4: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length4_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: cmpl (%rsi), %eax -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind optsize { -; X64-LABEL: length4_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length5: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length5_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movb 4(%rdi), %cl -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length8: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length8_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind optsize { -; X64-LABEL: length8_eq_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length12_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length12: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB15_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length16: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB16_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize { -; X64-SSE2-LABEL: length16_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind optsize { -; X64-SSE2-LABEL: length16_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length24: -; X64: # %bb.0: -; X64-NEXT: movl $24, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize { -; X64-SSE2-LABEL: length24_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length24_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind optsize { -; X64-SSE2-LABEL: length24_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length24_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length32: -; X64: # %bb.0: -; X64-NEXT: movl $32, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize { -; X64-SSE2-LABEL: length32_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind optsize { -; X64-SSE2-LABEL: length32_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: length64: -; X64: # %bb.0: -; X64-NEXT: movl $64, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize { -; X64-SSE2-LABEL: length64_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind optsize { -; X64-SSE2-LABEL: length64_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $.L.str, %esi -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize { -; X64-LABEL: bcmp_length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw (%rsi), %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll deleted file mode 100644 index cb45fd3ebb906..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ /dev/null @@ -1,600 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i32) -declare dso_local i32 @bcmp(ptr, ptr, i32) - -define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length2_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: cmpw (%eax), %cx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind !prof !14 { -; X86-LABEL: length2_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length2_eq_nobuiltin_attr: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length3: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB4_2: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length3_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %edx -; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movb 2(%ecx), %cl -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orw %dx, %ax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length4: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length4_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: cmpl (%eax), %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind !prof !14 { -; X86-LABEL: length4_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length5: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB9_2: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length5_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movb 4(%ecx), %cl -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length8: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB11_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length8_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind !prof !14 { -; X86-LABEL: length8_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-NEXT: xorl (%eax), %ecx -; X86-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-NEXT: xorl 4(%eax), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length12_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length12: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length16: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X86-NOSSE-LABEL: length16_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu (%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X86-SSE2-NEXT: pmovmskb %xmm1, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind !prof !14 { -; X86-NOSSE-LABEL: length16_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length24: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X86-NOSSE-LABEL: length24_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind !prof !14 { -; X86-NOSSE-LABEL: length24_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length32: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X86-NOSSE-LABEL: length32_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind !prof !14 { -; X86-NOSSE-LABEL: length32_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: length64: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X86-LABEL: length64_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind !prof !14 { -; X86-LABEL: length64_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 { -; X86-LABEL: bcmp_length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw (%ecx), %dx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"ProfileSummary", !1} -!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} -!2 = !{!"ProfileFormat", !"InstrProf"} -!3 = !{!"TotalCount", i32 10000} -!4 = !{!"MaxCount", i32 10} -!5 = !{!"MaxInternalCount", i32 1} -!6 = !{!"MaxFunctionCount", i32 1000} -!7 = !{!"NumCounts", i32 3} -!8 = !{!"NumFunctions", i32 3} -!9 = !{!"DetailedSummary", !10} -!10 = !{!11, !12, !13} -!11 = !{i32 10000, i32 100, i32 1} -!12 = !{i32 999000, i32 100, i32 1} -!13 = !{i32 999999, i32 1, i32 2} -!14 = !{!"function_entry_count", i32 0} diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll deleted file mode 100644 index 720344a22e43b..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ /dev/null @@ -1,613 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) -declare dso_local i32 @bcmp(ptr, ptr, i64) - -define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length2_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpw (%rsi), %ax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind !prof !14 { -; X64-LABEL: length2_eq_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length2_eq_nobuiltin_attr: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $2, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length3: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %edx -; X64-NEXT: rolw $8, %cx -; X64-NEXT: rolw $8, %dx -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length3_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movb 2(%rdi), %cl -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length4: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length4_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: cmpl (%rsi), %eax -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind !prof !14 { -; X64-LABEL: length4_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length5: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length5_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movb 4(%rdi), %cl -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length8: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length8_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind !prof !14 { -; X64-LABEL: length8_eq_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length12_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length12: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB15_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length16: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB16_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X64-SSE2-LABEL: length16_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind !prof !14 { -; X64-SSE2-LABEL: length16_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length16_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length24: -; X64: # %bb.0: -; X64-NEXT: movl $24, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X64-SSE2-LABEL: length24_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length24_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind !prof !14 { -; X64-SSE2-LABEL: length24_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX-LABEL: length24_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length32: -; X64: # %bb.0: -; X64-NEXT: movl $32, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X64-SSE2-LABEL: length32_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind !prof !14 { -; X64-SSE2-LABEL: length32_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: length64: -; X64: # %bb.0: -; X64-NEXT: movl $64, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 { -; X64-SSE2-LABEL: length64_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind !prof !14 { -; X64-SSE2-LABEL: length64_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pushq %rax -; X64-SSE2-NEXT: movl $.L.str, %esi -; X64-SSE2-NEXT: movl $64, %edx -; X64-SSE2-NEXT: callq memcmp -; X64-SSE2-NEXT: testl %eax, %eax -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: popq %rcx -; X64-SSE2-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 { -; X64-LABEL: bcmp_length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw (%rsi), %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"ProfileSummary", !1} -!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} -!2 = !{!"ProfileFormat", !"InstrProf"} -!3 = !{!"TotalCount", i64 10000} -!4 = !{!"MaxCount", i64 10} -!5 = !{!"MaxInternalCount", i64 1} -!6 = !{!"MaxFunctionCount", i64 1000} -!7 = !{!"NumCounts", i64 3} -!8 = !{!"NumFunctions", i64 3} -!9 = !{!"DetailedSummary", !10} -!10 = !{!11, !12, !13} -!11 = !{i32 10000, i64 100, i32 1} -!12 = !{i32 999000, i64 100, i32 1} -!13 = !{i32 999999, i64 1, i32 2} -!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll deleted file mode 100644 index a63402cea2096..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ /dev/null @@ -1,2429 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i32) - -define i32 @length0(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0: -; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - ret i32 %m - } - -define i1 @length0_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0_eq: -; X86: # %bb.0: -; X86-NEXT: movb $1, %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length0_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length0_lt: -; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length2(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - ret i32 %m -} - -define i32 @length2_const(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: addl $-12594, %eax # imm = 0xCECE -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - ret i32 %m -} - -define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_gt_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: addl $-12594, %eax # imm = 0xCECE -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: cmpw (%eax), %cx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_lt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_gt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_gt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: setg %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind { -; X86-LABEL: length2_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length2_eq_nobuiltin_attr: -; X86: # %bb.0: -; X86-NEXT: pushl $2 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length3: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi -; X86-NEXT: rolw $8, %dx -; X86-NEXT: rolw $8, %si -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB11_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB11_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length3_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %edx -; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orw %dx, %ax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: cmpl (%eax), %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_lt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setb %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_gt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length4_gt: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: seta %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind { -; X86-LABEL: length4_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB18_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length5_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length5_lt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %esi -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB20_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB20_2 -; X86-NEXT: .LBB20_3: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB20_2: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length7(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB21_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - ret i32 %m -} - -define i1 @length7_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7_lt: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB22_3: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length7_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length7_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 3(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 3(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length8: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB24_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB24_3 -; X86-NEXT: .LBB24_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl $1, %eax -; X86-NEXT: .LBB24_3: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length8_eq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: xorl 4(%eax), %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind { -; X86-LABEL: length8_eq_const: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130 -; X86-NEXT: xorl (%eax), %ecx -; X86-NEXT: movl $926299444, %edx # imm = 0x37363534 -; X86-NEXT: xorl 4(%eax), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length9_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length9_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $9 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length10_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length10_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $10 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length11_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length11_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $11 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length12_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length12: -; X86: # %bb.0: -; X86-NEXT: pushl $12 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind - ret i32 %m -} - -define i1 @length13_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length13_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $13 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length14_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length14_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $14 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length15(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15: -; X86: # %bb.0: -; X86-NEXT: pushl $15 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind - ret i32 %m -} - -define i1 @length15_lt(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $15 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length15_const(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15_const: -; X86: # %bb.0: -; X86-NEXT: pushl $15 -; X86-NEXT: pushl $.L.str+1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind - ret i32 %m -} - -define i1 @length15_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $15 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length15_gt_const: -; X86: # %bb.0: -; X86-NEXT: pushl $15 -; X86-NEXT: pushl $.L.str+1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length16: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length16_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length16_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $16 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu (%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X86-SSE2-NEXT: pmovmskb %xmm1, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length16_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu (%eax), %xmm1 -; X86-SSE41-NEXT: pxor %xmm0, %xmm1 -; X86-SSE41-NEXT: ptest %xmm1, %xmm1 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length16_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length16_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $16 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length16_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $16 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length16_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $16 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length16_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length16_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length24: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length24_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length24_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $24 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length24_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length24_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length24_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $24 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length24_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $24 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length24_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $24 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length24_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length24_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length31(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length31: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind - ret i32 %m -} - -define i1 @length31_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length31_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length31_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length31_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $31 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length31_eq_prefer128: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq_prefer128: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq_prefer128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq_prefer128: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length31_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $31 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length31_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $31 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length31_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length31_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length32: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind { -; X86-NOSSE-LABEL: length32_eq: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length32_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length32_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $32 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-NOSSE-LABEL: length32_eq_prefer128: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: sete %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq_prefer128: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: sete %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_prefer128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE2-NEXT: movdqu (%eax), %xmm2 -; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X86-SSE2-NEXT: pand %xmm2, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: sete %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq_prefer128: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE41-NEXT: movdqu (%ecx), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1 -; X86-SSE41-NEXT: movdqu (%eax), %xmm2 -; X86-SSE41-NEXT: pxor %xmm0, %xmm2 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0 -; X86-SSE41-NEXT: pxor %xmm1, %xmm0 -; X86-SSE41-NEXT: por %xmm2, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: sete %al -; X86-SSE41-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind { -; X86-NOSSE-LABEL: length32_eq_const: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl $32 -; X86-NOSSE-NEXT: pushl $.L.str -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: calll memcmp -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: testl %eax, %eax -; X86-NOSSE-NEXT: setne %al -; X86-NOSSE-NEXT: retl -; -; X86-SSE1-LABEL: length32_eq_const: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl $32 -; X86-SSE1-NEXT: pushl $.L.str -; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: calll memcmp -; X86-SSE1-NEXT: addl $12, %esp -; X86-SSE1-NEXT: testl %eax, %eax -; X86-SSE1-NEXT: setne %al -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: length32_eq_const: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movdqu (%eax), %xmm0 -; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pmovmskb %xmm0, %eax -; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-SSE2-NEXT: setne %al -; X86-SSE2-NEXT: retl -; -; X86-SSE41-LABEL: length32_eq_const: -; X86-SSE41: # %bb.0: -; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movdqu (%eax), %xmm0 -; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 -; X86-SSE41-NEXT: ptest %xmm0, %xmm0 -; X86-SSE41-NEXT: setne %al -; X86-SSE41-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length48(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length48: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind - ret i32 %m -} - -define i1 @length48_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length48_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length48_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length48_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X86-LABEL: length48_eq_prefer128: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_const(ptr %X) nounwind { -; X86-LABEL: length48_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $48 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length63(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length63: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind - ret i32 %m -} - -define i1 @length63_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length63_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length63_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length63_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_eq_const(ptr %X) nounwind { -; X86-LABEL: length63_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $63 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length64: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length64_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length64_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length64_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind { -; X86-LABEL: length64_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $64 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length96(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length96: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind - ret i32 %m -} - -define i1 @length96_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length96_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_eq_const(ptr %X) nounwind { -; X86-LABEL: length96_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $96 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length127(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length127: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind - ret i32 %m -} - -define i1 @length127_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length127_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_eq_const(ptr %X) nounwind { -; X86-LABEL: length127_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $127 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length128(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length128: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind - ret i32 %m -} - -define i1 @length128_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length128_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_eq_const(ptr %X) nounwind { -; X86-LABEL: length128_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $128 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length192(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length192: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind - ret i32 %m -} - -define i1 @length192_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length192_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_eq_const(ptr %X) nounwind { -; X86-LABEL: length192_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $192 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length255(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length255: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind - ret i32 %m -} - -define i1 @length255_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length255_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_eq_const(ptr %X) nounwind { -; X86-LABEL: length255_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $255 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length256(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length256: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind - ret i32 %m -} - -define i1 @length256_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length256_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_eq_const(ptr %X) nounwind { -; X86-LABEL: length256_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $256 # imm = 0x100 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length384(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length384: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind - ret i32 %m -} - -define i1 @length384_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length384_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_eq_const(ptr %X) nounwind { -; X86-LABEL: length384_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $384 # imm = 0x180 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length511(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length511: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind - ret i32 %m -} - -define i1 @length511_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length511_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_eq_const(ptr %X) nounwind { -; X86-LABEL: length511_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $511 # imm = 0x1FF -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length512(ptr %X, ptr %Y) nounwind { -; X86-LABEL: length512: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind - ret i32 %m -} - -define i1 @length512_eq(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setne %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_lt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_lt: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_gt(ptr %x, ptr %y) nounwind { -; X86-LABEL: length512_gt: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: setg %al -; X86-NEXT: retl - %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_eq_const(ptr %X) nounwind { -; X86-LABEL: length512_eq_const: -; X86: # %bb.0: -; X86-NEXT: pushl $512 # imm = 0x200 -; X86-NEXT: pushl $.L.str -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks that we do not do stupid things with huge sizes. -define i32 @huge_length(ptr %X, ptr %Y) nounwind { -; X86-LABEL: huge_length: -; X86: # %bb.0: -; X86-NEXT: pushl $-1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind - ret i32 %m -} - -define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { -; X86-LABEL: huge_length_eq: -; X86: # %bb.0: -; X86-NEXT: pushl $-1 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks non-constant sizes. -define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind { -; X86-LABEL: nonconst_length: -; X86: # %bb.0: -; X86-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind - ret i32 %m -} - -define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind { -; X86-LABEL: nonconst_length_eq: -; X86: # %bb.0: -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: calll memcmp -; X86-NEXT: addl $12, %esp -; X86-NEXT: testl %eax, %eax -; X86-NEXT: sete %al -; X86-NEXT: retl - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll deleted file mode 100644 index 83cb0d6f973be..0000000000000 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ /dev/null @@ -1,3065 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F - -; This tests codegen time inlining/optimization of memcmp -; rdar://6480398 - -@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 - -declare dso_local i32 @memcmp(ptr, ptr, i64) - -define i32 @length0(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - ret i32 %m - } - -define i1 @length0_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0_eq: -; X64: # %bb.0: -; X64-NEXT: movb $1, %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length0_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length0_lt: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length2(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - ret i32 %m -} - -define i32 @length2_const(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: rolw $8, %ax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: addl $-12594, %eax # imm = 0xCECE -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - ret i32 %m -} - -define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_gt_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: rolw $8, %ax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: addl $-12594, %eax # imm = 0xCECE -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpw (%rsi), %ax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length2_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_lt: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_gt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_gt: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_const(ptr %X) nounwind { -; X64-LABEL: length2_eq_const: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length2_eq_nobuiltin_attr: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $2, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length3(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length3: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %edx -; X64-NEXT: rolw $8, %cx -; X64-NEXT: rolw $8, %dx -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: jne .LBB11_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB11_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %dx, %cx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - ret i32 %m -} - -define i1 @length3_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length3_eq: -; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length4(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - ret i32 %m -} - -define i1 @length4_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: cmpl (%rsi), %eax -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length4_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax -; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: setb %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_gt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length4_gt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax -; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -define i1 @length4_eq_const(ptr %X) nounwind { -; X64-LABEL: length4_eq_const: -; X64: # %bb.0: -; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length5(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - ret i32 %m -} - -define i1 @length5_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movzbl 4(%rdi), %ecx -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length5_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length5_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB20_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB20_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length7(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB21_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB21_3 -; X64-NEXT: .LBB21_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB21_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - ret i32 %m -} - -define i1 @length7_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7_lt: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB22_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB22_3 -; X64-NEXT: .LBB22_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB22_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i1 @length7_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length7_eq: -; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: xorl 3(%rsi), %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length8(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length8: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - ret i32 %m -} - -define i1 @length8_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length8_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length8_eq_const(ptr %X) nounwind { -; X64-LABEL: length8_eq_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i1 @length9_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length9_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzbl 8(%rdi), %ecx -; X64-NEXT: xorb 8(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length10_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length10_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzwl 8(%rdi), %ecx -; X64-NEXT: xorw 8(%rsi), %cx -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length11_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length11_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 3(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 3(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length12_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length12_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: setne %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length12(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length12: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB31_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB31_3 -; X64-NEXT: .LBB31_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB31_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind - ret i32 %m -} - -define i1 @length13_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length13_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 5(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 5(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length14_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length14_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 6(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 6(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length15(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB34_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB34_3 -; X64-NEXT: .LBB34_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB34_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - ret i32 %m -} - -define i1 @length15_lt(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB35_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB35_3 -; X64-NEXT: .LBB35_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB35_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - %c = icmp slt i32 %m, 0 - ret i1 %c -} - -define i32 @length15_const(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3544952156018063160, %rcx # imm = 0x3132333435363738 -; X64-NEXT: movq (%rdi), %rdx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rcx, %rdx -; X64-NEXT: jne .LBB36_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movabsq $4051322327650219061, %rcx # imm = 0x3839303132333435 -; X64-NEXT: movq 7(%rdi), %rdx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rcx, %rdx -; X64-NEXT: je .LBB36_3 -; X64-NEXT: .LBB36_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rcx, %rdx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB36_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind - ret i32 %m -} - -define i1 @length15_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15_eq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: xorq 7(%rsi), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length15_gt_const: -; X64: # %bb.0: -; X64-NEXT: movabsq $3544952156018063160, %rax # imm = 0x3132333435363738 -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rax, %rcx -; X64-NEXT: jne .LBB38_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movabsq $4051322327650219061, %rax # imm = 0x3839303132333435 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rax, %rcx -; X64-NEXT: je .LBB38_3 -; X64-NEXT: .LBB38_2: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rax, %rcx -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB38_3: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind - %c = icmp sgt i32 %m, 0 - ret i1 %c -} - -; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 - -define i32 @length16(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length16: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB39_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB39_3 -; X64-NEXT: .LBB39_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB39_3: # %endblock -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind - ret i32 %m -} - -define i1 @length16_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length16_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: pmovmskb %xmm1, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length16_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm0, %xmm1 -; X64-SSE41-NEXT: ptest %xmm1, %xmm1 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length16_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length16_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length16_lt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB41_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB41_3 -; X64-NEXT: .LBB41_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: orl $1, %eax -; X64-NEXT: .LBB41_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length16_gt: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB42_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax -; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB42_3 -; X64-NEXT: .LBB42_2: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: sbbl %edx, %edx -; X64-NEXT: orl $1, %edx -; X64-NEXT: .LBB42_3: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length16_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length16_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length16_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length16_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length16_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 - -define i32 @length24(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length24: -; X64: # %bb.0: -; X64-NEXT: movl $24, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind - ret i32 %m -} - -define i1 @length24_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length24_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: pand %xmm1, %xmm2 -; X64-SSE2-NEXT: pmovmskb %xmm2, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length24_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm1 -; X64-SSE41-NEXT: pxor %xmm0, %xmm1 -; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: por %xmm1, %xmm2 -; X64-SSE41-NEXT: ptest %xmm2, %xmm2 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length24_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length24_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length24_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $24, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length24_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $24, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length24_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length24_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length24_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length24_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length24_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = [959985462,858927408,0,0] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length31(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length31: -; X64: # %bb.0: -; X64-NEXT: movl $31, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind - ret i32 %m -} - -define i1 @length31_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length31_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length31_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $31, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length31_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $31, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-SSE2-LABEL: length31_eq_prefer128: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq_prefer128: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq_prefer128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq_prefer128: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length31_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length31_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length31_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length31_eq_const: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: setne %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length31_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length32(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length32: -; X64: # %bb.0: -; X64-NEXT: movl $32, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind - ret i32 %m -} - -; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 - -define i1 @length32_eq(ptr %x, ptr %y) nounwind { -; X64-SSE2-LABEL: length32_eq: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length32_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $32, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length32_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $32, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-SSE2-LABEL: length32_eq_prefer128: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm2, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: sete %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq_prefer128: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: movdqu (%rsi), %xmm2 -; X64-SSE41-NEXT: pxor %xmm0, %xmm2 -; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm2, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: sete %al -; X64-SSE41-NEXT: retq -; -; X64-AVX-LABEL: length32_eq_prefer128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 -; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vptest %xmm0, %xmm0 -; X64-AVX-NEXT: sete %al -; X64-AVX-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq_prefer128: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length32_eq_const(ptr %X) nounwind { -; X64-SSE2-LABEL: length32_eq_const: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pmovmskb %xmm0, %eax -; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X64-SSE2-NEXT: setne %al -; X64-SSE2-NEXT: retq -; -; X64-SSE41-LABEL: length32_eq_const: -; X64-SSE41: # %bb.0: -; X64-SSE41-NEXT: movdqu (%rdi), %xmm0 -; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 -; X64-SSE41-NEXT: ptest %xmm0, %xmm0 -; X64-SSE41-NEXT: setne %al -; X64-SSE41-NEXT: retq -; -; X64-AVX1-LABEL: length32_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length32_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length32_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length32_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-MIC-AVX-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length48(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length48: -; X64: # %bb.0: -; X64-NEXT: movl $48, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind - ret i32 %m -} - -define i1 @length48_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length48_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $48, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length48_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1 -; X64-AVX1-NEXT: vmovups 32(%rsi), %xmm2 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length48_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX2-NEXT: vmovdqu 32(%rsi), %xmm2 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length48_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length48_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length48_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $48, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length48_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $48, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { -; X64-LABEL: length48_eq_prefer128: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $48, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind - %cmp = icmp eq i32 %call, 0 - ret i1 %cmp -} - -define i1 @length48_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length48_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $48, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length48_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length48_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length48_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length48_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind - %c = icmp ne i32 %m, 0 - ret i1 %c -} - -define i32 @length63(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length63: -; X64: # %bb.0: -; X64-NEXT: movl $63, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind - ret i32 %m -} - -define i1 @length63_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length63_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $63, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length63_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 31(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length63_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length63_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length63_eq: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: setne %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length63_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $63, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length63_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $63, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length63_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length63_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $63, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length63_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 31(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length63_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length63_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vptest %ymm0, %ymm0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX-LABEL: length63_eq_const: -; X64-MIC-AVX: # %bb.0: -; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX-NEXT: sete %al -; X64-MIC-AVX-NEXT: vzeroupper -; X64-MIC-AVX-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length64(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length64: -; X64: # %bb.0: -; X64-NEXT: movl $64, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind - ret i32 %m -} - -define i1 @length64_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length64_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $64, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length64_eq: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-AVX512-NEXT: kortestw %k0, %k0 -; X64-AVX512-NEXT: setne %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length64_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqu (%rsi), %ymm2 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rsi), %ymm3 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length64_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length64_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $64, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length64_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $64, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length64_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length64_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $64, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length64_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 -; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vptest %ymm0, %ymm0 -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length64_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vptest %ymm0, %ymm0 -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: length64_eq_const: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-AVX512-NEXT: kortestw %k0, %k0 -; X64-AVX512-NEXT: sete %al -; X64-AVX512-NEXT: vzeroupper -; X64-AVX512-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length64_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; X64-MIC-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,825243960,892613426,959985462,858927408] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 -; X64-MIC-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] -; X64-MIC-AVX2-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; X64-MIC-AVX2-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: vzeroupper -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length64_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k0 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length96(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length96: -; X64: # %bb.0: -; X64-NEXT: movl $96, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind - ret i32 %m -} - -define i1 @length96_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length96_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $96, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length96_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $96, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length96_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $96, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length96_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512BW-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length96_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length96_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $96, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length96_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rsi), %ymm2 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length96_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $96, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length96_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $96, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length96_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length96_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $96, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length96_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $96, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length96_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $96, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length96_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-AVX512BW-NEXT: kortestq %k1, %k0 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length96_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-AVX512F-NEXT: kortestw %k1, %k0 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length96_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $96, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length96_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu 64(%rdi), %ymm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k1, %k0 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length127(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length127: -; X64: # %bb.0: -; X64-NEXT: movl $127, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind - ret i32 %m -} - -define i1 @length127_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length127_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $127, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length127_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $127, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length127_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $127, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length127_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb 63(%rsi), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length127_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length127_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $127, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length127_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 63(%rsi), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length127_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $127, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length127_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $127, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length127_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length127_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $127, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length127_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $127, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length127_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $127, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length127_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+63(%rip), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length127_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length127_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $127, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length127_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+63(%rip), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length128(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length128: -; X64: # %bb.0: -; X64-NEXT: movl $128, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind - ret i32 %m -} - -define i1 @length128_eq(ptr %x, ptr %y) nounwind { -; X64-SSE-LABEL: length128_eq: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $128, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: setne %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length128_eq: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $128, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: setne %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length128_eq: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $128, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: setne %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length128_eq: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb 64(%rsi), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb (%rsi), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: setne %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length128_eq: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: setne %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length128_eq: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $128, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: setne %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length128_eq: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd 64(%rsi), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd (%rsi), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: setne %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length128_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $128, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length128_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $128, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length128_eq_const(ptr %X) nounwind { -; X64-SSE-LABEL: length128_eq_const: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: movl $.L.str, %esi -; X64-SSE-NEXT: movl $128, %edx -; X64-SSE-NEXT: callq memcmp -; X64-SSE-NEXT: testl %eax, %eax -; X64-SSE-NEXT: sete %al -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: length128_eq_const: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rax -; X64-AVX1-NEXT: movl $.L.str, %esi -; X64-AVX1-NEXT: movl $128, %edx -; X64-AVX1-NEXT: callq memcmp -; X64-AVX1-NEXT: testl %eax, %eax -; X64-AVX1-NEXT: sete %al -; X64-AVX1-NEXT: popq %rcx -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: length128_eq_const: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: pushq %rax -; X64-AVX2-NEXT: movl $.L.str, %esi -; X64-AVX2-NEXT: movl $128, %edx -; X64-AVX2-NEXT: callq memcmp -; X64-AVX2-NEXT: testl %eax, %eax -; X64-AVX2-NEXT: sete %al -; X64-AVX2-NEXT: popq %rcx -; X64-AVX2-NEXT: retq -; -; X64-AVX512BW-LABEL: length128_eq_const: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512BW-NEXT: vpcmpneqb .L.str(%rip), %zmm0, %k1 -; X64-AVX512BW-NEXT: kortestq %k0, %k1 -; X64-AVX512BW-NEXT: sete %al -; X64-AVX512BW-NEXT: vzeroupper -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512F-LABEL: length128_eq_const: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-AVX512F-NEXT: kortestw %k0, %k1 -; X64-AVX512F-NEXT: sete %al -; X64-AVX512F-NEXT: vzeroupper -; X64-AVX512F-NEXT: retq -; -; X64-MIC-AVX2-LABEL: length128_eq_const: -; X64-MIC-AVX2: # %bb.0: -; X64-MIC-AVX2-NEXT: pushq %rax -; X64-MIC-AVX2-NEXT: movl $.L.str, %esi -; X64-MIC-AVX2-NEXT: movl $128, %edx -; X64-MIC-AVX2-NEXT: callq memcmp -; X64-MIC-AVX2-NEXT: testl %eax, %eax -; X64-MIC-AVX2-NEXT: sete %al -; X64-MIC-AVX2-NEXT: popq %rcx -; X64-MIC-AVX2-NEXT: retq -; -; X64-MIC-AVX512F-LABEL: length128_eq_const: -; X64-MIC-AVX512F: # %bb.0: -; X64-MIC-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 -; X64-MIC-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str+64(%rip), %zmm1, %k0 -; X64-MIC-AVX512F-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k1 -; X64-MIC-AVX512F-NEXT: kortestw %k0, %k1 -; X64-MIC-AVX512F-NEXT: sete %al -; X64-MIC-AVX512F-NEXT: vzeroupper -; X64-MIC-AVX512F-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length192(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length192: -; X64: # %bb.0: -; X64-NEXT: movl $192, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind - ret i32 %m -} - -define i1 @length192_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length192_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length192_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length192_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length192_eq_const(ptr %X) nounwind { -; X64-LABEL: length192_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $192, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length255(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length255: -; X64: # %bb.0: -; X64-NEXT: movl $255, %edx -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind - ret i32 %m -} - -define i1 @length255_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length255_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length255_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length255_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length255_eq_const(ptr %X) nounwind { -; X64-LABEL: length255_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $255, %edx -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length256(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length256: -; X64: # %bb.0: -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind - ret i32 %m -} - -define i1 @length256_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length256_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length256_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length256_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length256_eq_const(ptr %X) nounwind { -; X64-LABEL: length256_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $256, %edx # imm = 0x100 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length384(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length384: -; X64: # %bb.0: -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind - ret i32 %m -} - -define i1 @length384_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length384_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length384_eq_const(ptr %X) nounwind { -; X64-LABEL: length384_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $384, %edx # imm = 0x180 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length511(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length511: -; X64: # %bb.0: -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind - ret i32 %m -} - -define i1 @length511_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length511_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length511_eq_const(ptr %X) nounwind { -; X64-LABEL: length511_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $511, %edx # imm = 0x1FF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -define i32 @length512(ptr %X, ptr %Y) nounwind { -; X64-LABEL: length512: -; X64: # %bb.0: -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind - ret i32 %m -} - -define i1 @length512_eq(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp ne i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_lt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_lt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp slt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_gt(ptr %x, ptr %y) nounwind { -; X64-LABEL: length512_gt: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setg %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind - %cmp = icmp sgt i32 %call, 0 - ret i1 %cmp -} - -define i1 @length512_eq_const(ptr %X) nounwind { -; X64-LABEL: length512_eq_const: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movl $.L.str, %esi -; X64-NEXT: movl $512, %edx # imm = 0x200 -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks that we do not do stupid things with huge sizes. -define i32 @huge_length(ptr %X, ptr %Y) nounwind { -; X64-LABEL: huge_length: -; X64: # %bb.0: -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - ret i32 %m -} - -define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { -; X64-LABEL: huge_length_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} - -; This checks non-constant sizes. -define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { -; X64-LABEL: nonconst_length: -; X64: # %bb.0: -; X64-NEXT: jmp memcmp # TAILCALL - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - ret i32 %m -} - -define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { -; X64-LABEL: nonconst_length_eq: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: callq memcmp -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind - %c = icmp eq i32 %m, 0 - ret i1 %c -} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 6f2bba84a6ecc..fae0686b62662 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: Default Regalloc Priority Advisor @@ -42,13 +42,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Lower constant intrinsics diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 51fb93daa4dfa..c430d4f0386b6 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -274,6 +274,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpModulePass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index d451d2897f673..1691e186fb862 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -141,6 +141,8 @@ ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O23SZ-NEXT: Running pass: LoopSink ; CHECK-O23SZ-NEXT: Running pass: DivRemPairs +; CHECK-O23SZ-NEXT: Running pass: MergeICmpsPass +; CHECK-O23SZ-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass ; CHECK-O23SZ-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 064362eabbf83..be4bd7dd22684 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -197,6 +197,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-POSTLINK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-POSTLINK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-POSTLINK-O-NEXT: Running pass: TailCallElimPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-POST-EP-OPT-LAST-NEXT: Running pass: NoOpModulePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 19a44867e434a..a77013809ccf0 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -182,6 +182,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index ac80a31d8fd4b..b2c6464108d4d 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -190,6 +190,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll new file mode 100644 index 0000000000000..18141e72007f7 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/bcmp.ll @@ -0,0 +1,751 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +declare i32 @bcmp(ptr, ptr, i64) + +define i1 @bcmp0(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp0( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp1(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp1( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 1) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp2(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 2) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; or (and (xor a, b), C1), (and (xor c, d), C2) +define i1 @bcmp3(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp3( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp4(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp4( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 4) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; or (xor a, b), (and (xor c, d), C2) +define i1 @bcmp5(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp5( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; or (xor a, b), (and (xor c, d), C2) +define i1 @bcmp6(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp6( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; or (xor a, b), (xor c, d) +define i1 @bcmp7(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp7( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 7) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp8(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp8( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 8) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; or (xor a, b), (and (xor c, d), C2) +define i1 @bcmp9(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp9( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp10(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp10( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp11(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp11( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 11) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp12(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp12( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp13(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp13( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 5 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 13) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp14(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp14( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 14) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp15(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp15( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 15) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp16(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp16( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp20(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp20( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i64 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP19]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp24(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp24( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP17]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp28(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp28( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP23]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP25]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp33(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp33( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = zext i8 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP28]], [[TMP25]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i64 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = zext i1 [[TMP30]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP31]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp38(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp38( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 30 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 30 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP26]], [[TMP23]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = zext i1 [[TMP28]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP29]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp45(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp45( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[A]], i64 37 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 37 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP35]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +; Although the large cmp chain may be not profitable on high end CPU, we +; believe it is better on most cpus, so perform the transform now. +; 8 xor + 7 or + 1 cmp only need 6 cycles on a 4 width ALU port machine +; 2 cycle for xor +; 3 cycle for or +; 1 cycle for cmp +define i1 @bcmp64(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp64( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[B]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[A]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 40 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[A]], i64 48 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 48 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[A]], i64 56 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[B]], i64 56 +; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]] +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP47]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp89(ptr %a, ptr %b) { +; CHECK-LABEL: define i1 @bcmp89( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[CR:%.*]] = call i32 @bcmp(ptr [[A]], ptr [[B]], i64 89) +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CR]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %cr = call i32 @bcmp(ptr %a, ptr %b, i64 89) + %r = icmp eq i32 %cr, 0 + ret i1 %r +} + +define i1 @bcmp_zext(i32 %0, i32 %1, i8 %2, i8 %3) { +; CHECK-LABEL: define i1 @bcmp_zext( +; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i8 [[TMP2:%.*]], i8 [[TMP3:%.*]]) { +; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = xor i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: ret i1 [[TMP9]] +; + %5 = xor i32 %1, %0 + %6 = xor i8 %3, %2 + %7 = zext i8 %6 to i32 + %8 = or i32 %5, %7 + %9 = icmp eq i32 %8, 0 + ret i1 %9 +} + +define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) { +; CHECK-LABEL: define i1 @bcmp_i8( +; CHECK-SAME: i8 [[A0:%.*]], i8 [[B0:%.*]], i8 [[A1:%.*]], i8 [[B1:%.*]], i8 [[A2:%.*]], i8 [[B2:%.*]]) { +; CHECK-NEXT: [[XOR0:%.*]] = xor i8 [[B0]], [[A0]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B1]], [[A1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[B2]], [[A2]] +; CHECK-NEXT: [[OR0:%.*]] = or i8 [[XOR0]], [[XOR1]] +; CHECK-NEXT: [[OR1:%.*]] = or i8 [[OR0]], [[XOR2]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[OR1]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %xor0 = xor i8 %b0, %a0 + %xor1 = xor i8 %b1, %a1 + %xor2 = xor i8 %b2, %a2 + %or0 = or i8 %xor0, %xor1 + %or1 = or i8 %or0, %xor2 + %r = icmp eq i8 %or1, 0 + ret i1 %r +} + +define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) { +; CHECK-LABEL: define i1 @bcmp_i16( +; CHECK-SAME: i16 [[A0:%.*]], i16 [[B0:%.*]], i16 [[A1:%.*]], i16 [[B1:%.*]], i16 [[A2:%.*]], i16 [[B2:%.*]]) { +; CHECK-NEXT: [[XOR0:%.*]] = xor i16 [[B0]], [[A0]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i16 [[B1]], [[A1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i16 [[B2]], [[A2]] +; CHECK-NEXT: [[OR0:%.*]] = or i16 [[XOR0]], [[XOR1]] +; CHECK-NEXT: [[OR1:%.*]] = or i16 [[OR0]], [[XOR2]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[OR1]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %xor0 = xor i16 %b0, %a0 + %xor1 = xor i16 %b1, %a1 + %xor2 = xor i16 %b2, %a2 + %or0 = or i16 %xor0, %xor1 + %or1 = or i16 %or0, %xor2 + %r = icmp eq i16 %or1, 0 + ret i1 %r +} + +define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) { +; CHECK-LABEL: define i1 @bcmp_i128( +; CHECK-SAME: i128 [[A0:%.*]], i128 [[B0:%.*]], i128 [[A1:%.*]], i128 [[B1:%.*]], i128 [[A2:%.*]], i128 [[B2:%.*]]) { +; CHECK-NEXT: [[XOR0:%.*]] = xor i128 [[B0]], [[A0]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i128 [[B1]], [[A1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i128 [[B2]], [[A2]] +; CHECK-NEXT: [[OR0:%.*]] = or i128 [[XOR0]], [[XOR1]] +; CHECK-NEXT: [[OR1:%.*]] = or i128 [[OR0]], [[XOR2]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i128 [[OR1]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %xor0 = xor i128 %b0, %a0 + %xor1 = xor i128 %b1, %a1 + %xor2 = xor i128 %b2, %a2 + %or0 = or i128 %xor0, %xor1 + %or1 = or i128 %or0, %xor2 + %r = icmp ne i128 %or1, 0 + ret i1 %r +} + +define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) { +; CHECK-LABEL: define i1 @bcmp_i42( +; CHECK-SAME: i42 [[A0:%.*]], i42 [[B0:%.*]], i42 [[A1:%.*]], i42 [[B1:%.*]], i42 [[A2:%.*]], i42 [[B2:%.*]]) { +; CHECK-NEXT: [[XOR0:%.*]] = xor i42 [[B0]], [[A0]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i42 [[B1]], [[A1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i42 [[B2]], [[A2]] +; CHECK-NEXT: [[OR0:%.*]] = or i42 [[XOR0]], [[XOR1]] +; CHECK-NEXT: [[OR1:%.*]] = or i42 [[OR0]], [[XOR2]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i42 [[OR1]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %xor0 = xor i42 %b0, %a0 + %xor1 = xor i42 %b1, %a1 + %xor2 = xor i42 %b2, %a2 + %or0 = or i42 %xor0, %xor1 + %or1 = or i42 %or0, %xor2 + %r = icmp ne i42 %or1, 0 + ret i1 %r +} diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll new file mode 100644 index 0000000000000..e9573816c9788 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp-extra.ll @@ -0,0 +1,3434 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length0( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 0 +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length0_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i1 true +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length0_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i1 false +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length2( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i32 @length2_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length2_const( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; CHECK-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_gt_const( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length2_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length3( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i24, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i24, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i24 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i24 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length3_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length4( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length4_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length4_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length4_lt_32( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: ret i32 [[TMP6]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = lshr i32 %m, 31 + ret i32 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length4_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length4_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length5( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length5_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length5_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length6(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length6( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind + ret i32 %m +} + +define i32 @length6_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length6_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: ret i32 [[TMP8]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind + %r = lshr i32 %m, 31 + ret i32 %r +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length7( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + ret i32 %m +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length7_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length7_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: ret i1 [[TMP10]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length8( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length8_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length8_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length9(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length9( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + ret i32 %m +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length9_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length10(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length10( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + ret i32 %m +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length10_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length11(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length11( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + ret i32 %m +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length11_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length12_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length12( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length13_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length14_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length15(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length15( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + ret i32 %m +} + +define i1 @length15_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length15_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length15_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length15_const( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; CHECK-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; CHECK-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + ret i32 %m +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length15_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length15_gt_const( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; CHECK-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; CHECK-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length16( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length16_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length16_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length16_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length16_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length24( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length24_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length24_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length24_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length24_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; CHECK-NEXT: ret i1 [[TMP11]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length31( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length31_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length31_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length31_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: define i1 @length31_eq_prefer128( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length31_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3474870397276861491 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; CHECK-NEXT: ret i1 [[TMP15]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length32( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length32_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length32_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length32_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: define i1 @length32_eq_prefer128( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP23]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length32_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; CHECK-NEXT: ret i1 [[TMP15]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length48( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length48_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP35]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length48_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length48_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB5]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: define i1 @length48_eq_prefer128( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP32]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = zext i1 [[TMP34]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP35]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length48_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = zext i1 [[TMP23]] to i32 +; CHECK-NEXT: ret i1 [[TMP23]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length63( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length63_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[X]], i64 55 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[Y]], i64 55 +; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]] +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32 +; CHECK-NEXT: ret i1 [[TMP46]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length63_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length63_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 55 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 55 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length63_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 3833745473465760056 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[X]], i64 55 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 3616724998069630517 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = zext i1 [[TMP31]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP32]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length64( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length64_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = xor i64 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[X]], i64 56 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[Y]], i64 56 +; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[TMP34]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = xor i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP33]], [[TMP38]] +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = zext i1 [[TMP46]] to i32 +; CHECK-NEXT: ret i1 [[TMP46]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length64_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length64_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ], [ [[TMP33:%.*]], [[LOADBB4:%.*]] ], [ [[TMP40:%.*]], [[LOADBB5:%.*]] ], [ [[TMP47:%.*]], [[LOADBB6:%.*]] ], [ [[TMP54:%.*]], [[LOADBB7:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ], [ [[TMP34:%.*]], [[LOADBB4]] ], [ [[TMP41:%.*]], [[LOADBB5]] ], [ [[TMP48:%.*]], [[LOADBB6]] ], [ [[TMP55:%.*]], [[LOADBB7]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; CHECK-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; CHECK-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[LOADBB4]], label [[RES_BLOCK]] +; CHECK: loadbb4: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; CHECK-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP33]] = call i64 @llvm.bswap.i64(i64 [[TMP31]]) +; CHECK-NEXT: [[TMP34]] = call i64 @llvm.bswap.i64(i64 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP35]], label [[LOADBB5]], label [[RES_BLOCK]] +; CHECK: loadbb5: +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[Y]], i64 40 +; CHECK-NEXT: [[TMP38:%.*]] = load i64, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = call i64 @llvm.bswap.i64(i64 [[TMP38]]) +; CHECK-NEXT: [[TMP41]] = call i64 @llvm.bswap.i64(i64 [[TMP39]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: br i1 [[TMP42]], label [[LOADBB6]], label [[RES_BLOCK]] +; CHECK: loadbb6: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i64, ptr [[TMP44]], align 1 +; CHECK-NEXT: [[TMP47]] = call i64 @llvm.bswap.i64(i64 [[TMP45]]) +; CHECK-NEXT: [[TMP48]] = call i64 @llvm.bswap.i64(i64 [[TMP46]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: br i1 [[TMP49]], label [[LOADBB7]], label [[RES_BLOCK]] +; CHECK: loadbb7: +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[X]], i64 56 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[Y]], i64 56 +; CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP54]] = call i64 @llvm.bswap.i64(i64 [[TMP52]]) +; CHECK-NEXT: [[TMP55]] = call i64 @llvm.bswap.i64(i64 [[TMP53]]) +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[TMP54]], [[TMP55]] +; CHECK-NEXT: br i1 [[TMP56]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB7]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length64_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 3833745473465760056 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 3689065127958034230 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 3544395820347831604 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 4123106164818064178 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 40 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 3978425819141910832 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 3833745473465760056 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[X]], i64 56 +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 3689065127958034230 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = zext i1 [[TMP31]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP32]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length96( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length96_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length96_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length96_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length96_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length127( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length127_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length127_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length127_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length127_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length128( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length128_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length128_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length128_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length128_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length192( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length192_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length192_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length192_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length192_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length255( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length255_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length255_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length255_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length255_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length256( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length256_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length256_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length256_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length256_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length384( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length384_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length384_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length384_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length384_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length511( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length511_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length511_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length511_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length511_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @length512( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length512_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length512_lt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: define i1 @length512_gt( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; CHECK-LABEL: define i1 @length512_eq_const( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i32 @huge_length( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @huge_length_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: define i32 @nonconst_length( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: define i1 @nonconst_length_eq( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll index 92439691e1873..735fb27da1606 100644 --- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) diff --git a/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg b/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg new file mode 100644 index 0000000000000..d1828f2b613d9 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/BPF/lit.local.cfg @@ -0,0 +1,4 @@ +if not "BPF" in config.root.targets: + config.unsupported = True +if "system-aix" in config.available_features: + config.unsupported = True diff --git a/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll new file mode 100644 index 0000000000000..1accfe88d1a82 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/BPF/memcmp.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=bpf < %s | FileCheck %s --check-prefix=BPF +; RUN: opt -S -passes=expand-memcmp -mtriple=bpf -mcpu=v3 < %s | FileCheck %s --check-prefix=BPF-V3 +; +; Source code: +; /* set aligned 4 to minimize the number of loads */ +; struct build_id { +; unsigned char id[20]; +; } __attribute__((aligned(4))); +; +; /* try to compute a local build_id */ +; void bar1(ptr); +; +; /* the global build_id to compare */ +; struct build_id id2; +; +; int foo() +; { +; struct build_id id1; +; +; bar1(&id1); +; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0; +; } +; Compilation flags: +; clang -target bpf -S -O2 t.c -emit-llvm + +%struct.build_id = type { [20 x i8] } + +@id2 = dso_local global %struct.build_id zeroinitializer, align 4 + +; Function Attrs: noinline nounwind +define dso_local i32 @foo() #0 { +; BPF-LABEL: define dso_local i32 @foo( +; BPF-SAME: ) #[[ATTR0:[0-9]+]] { +; BPF-NEXT: entry: +; BPF-NEXT: [[ID1:%.*]] = alloca [[STRUCT_BUILD_ID:%.*]], align 4 +; BPF-NEXT: call void @bar1(ptr noundef [[ID1]]) +; BPF-NEXT: br label [[LOADBB:%.*]] +; BPF: res_block: +; BPF-NEXT: br label [[ENDBLOCK:%.*]] +; BPF: loadbb: +; BPF-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID1]], align 4 +; BPF-NEXT: [[TMP1:%.*]] = load i64, ptr @id2, align 4 +; BPF-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]] +; BPF-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; BPF: loadbb1: +; BPF-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[ID1]], i64 8 +; BPF-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4 +; BPF-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr (i8, ptr @id2, i64 8), align 4 +; BPF-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +; BPF-NEXT: br i1 [[TMP6]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] +; BPF: loadbb2: +; BPF-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ID1]], i64 16 +; BPF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +; BPF-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr (i8, ptr @id2, i64 16), align 4 +; BPF-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP8]], [[TMP9]] +; BPF-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; BPF: endblock: +; BPF-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] +; BPF-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; BPF-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; BPF-NEXT: ret i32 [[CONV]] +; +; BPF-V3-LABEL: define dso_local i32 @foo( +; BPF-V3-SAME: ) #[[ATTR0:[0-9]+]] { +; BPF-V3-NEXT: entry: +; BPF-V3-NEXT: [[ID1:%.*]] = alloca [[STRUCT_BUILD_ID:%.*]], align 4 +; BPF-V3-NEXT: call void @bar1(ptr noundef [[ID1]]) +; BPF-V3-NEXT: br label [[LOADBB:%.*]] +; BPF-V3: res_block: +; BPF-V3-NEXT: br label [[ENDBLOCK:%.*]] +; BPF-V3: loadbb: +; BPF-V3-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID1]], align 4 +; BPF-V3-NEXT: [[TMP1:%.*]] = load i64, ptr @id2, align 4 +; BPF-V3-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]] +; BPF-V3-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; BPF-V3: loadbb1: +; BPF-V3-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[ID1]], i64 8 +; BPF-V3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4 +; BPF-V3-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr (i8, ptr @id2, i64 8), align 4 +; BPF-V3-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +; BPF-V3-NEXT: br i1 [[TMP6]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] +; BPF-V3: loadbb2: +; BPF-V3-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ID1]], i64 16 +; BPF-V3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +; BPF-V3-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr (i8, ptr @id2, i64 16), align 4 +; BPF-V3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP8]], [[TMP9]] +; BPF-V3-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; BPF-V3: endblock: +; BPF-V3-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] +; BPF-V3-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; BPF-V3-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; BPF-V3-NEXT: ret i32 [[CONV]] +; +entry: + %id1 = alloca %struct.build_id, align 4 + call void @bar1(ptr noundef %id1) + %call = call i32 @memcmp(ptr noundef %id1, ptr noundef @id2, i64 noundef 20) #3 + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +declare dso_local void @bar1(ptr noundef) #1 + +; Function Attrs: nounwind +declare dso_local i32 @memcmp(ptr noundef, ptr noundef, i64 noundef) #2 + +attributes #0 = { noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 18.0.0git (git@github.com:llvm/llvm-project.git a776740d6296520b8bde156aa3f8d9ecb32cddd9)"} diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg b/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..bb982488eb15e --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not "PowerPC" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll new file mode 100644 index 0000000000000..9a75b147e7e1f --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -0,0 +1,218 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4 +@zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4 +@zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4 +@zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4 +@zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4 +@zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4 +@zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4 +@zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4 + +declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1 + +; Check 4 bytes - requires 1 load for each param. +define signext i32 @zeroEqualityTest02(ptr %x, ptr %y) { +; CHECK-LABEL: define signext i32 @zeroEqualityTest02( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[DOT]] +; + %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 4) + %not.cmp = icmp ne i32 %call, 0 + %. = zext i1 %not.cmp to i32 + ret i32 %. +} + +; Check 16 bytes - requires 2 loads for each param (or use vectors?). +define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) { +; CHECK-LABEL: define signext i32 @zeroEqualityTest01( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp ne i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_TOBOOL]] to i32 +; CHECK-NEXT: ret i32 [[DOT]] +; + %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 16) + %not.tobool = icmp ne i32 %call, 0 + %. = zext i1 %not.tobool to i32 + ret i32 %. +} + +; Check 7 bytes - requires 3 loads for each param. +define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) { +; CHECK-LABEL: define signext i32 @zeroEqualityTest03( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[NOT_LNOT:%.*]] = icmp ne i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_LNOT]] to i32 +; CHECK-NEXT: ret i32 [[COND]] +; + %call = tail call signext i32 @memcmp(ptr %x, ptr %y, i64 7) + %not.lnot = icmp ne i32 %call, 0 + %cond = zext i1 %not.lnot to i32 + ret i32 %cond +} + +; Validate with > 0 +define signext i32 @zeroEqualityTest04() { +; CHECK-LABEL: define signext i32 @zeroEqualityTest04( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ 288230376151711744, [[LOADBB]] ], [ 0, [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 216172782113783808, [[LOADBB]] ], [ 0, [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: br i1 false, label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: br i1 true, label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[NOT_CMP:%.*]] = icmp slt i32 [[PHI_RES]], 1 +; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_CMP]] to i32 +; CHECK-NEXT: ret i32 [[DOT]] +; + %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest02.buffer1, ptr @zeroEqualityTest02.buffer2, i64 16) + %not.cmp = icmp slt i32 %call, 1 + %. = zext i1 %not.cmp to i32 + ret i32 %. +} + +; Validate with < 0 +define signext i32 @zeroEqualityTest05() { +; CHECK-LABEL: define signext i32 @zeroEqualityTest05( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ 0, [[LOADBB]] ], [ 50331648, [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 0, [[LOADBB]] ], [ 67108864, [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: br i1 true, label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: br i1 false, label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[CALL_LOBIT:%.*]] = lshr i32 [[PHI_RES]], 31 +; CHECK-NEXT: [[CALL_LOBIT_NOT:%.*]] = xor i32 [[CALL_LOBIT]], 1 +; CHECK-NEXT: ret i32 [[CALL_LOBIT_NOT]] +; + %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest03.buffer1, ptr @zeroEqualityTest03.buffer2, i64 16) + %call.lobit = lshr i32 %call, 31 + %call.lobit.not = xor i32 %call.lobit, 1 + ret i32 %call.lobit.not +} + +; Validate with memcmp()?: +define signext i32 @equalityFoldTwoConstants() { +; CHECK-LABEL: define signext i32 @equalityFoldTwoConstants( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: br i1 false, label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: br i1 false, label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_TOBOOL]] to i32 +; CHECK-NEXT: ret i32 [[COND]] +; + %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr @zeroEqualityTest04.buffer2, i64 16) + %not.tobool = icmp eq i32 %call, 0 + %cond = zext i1 %not.tobool to i32 + ret i32 %cond +} + +define signext i32 @equalityFoldOneConstant(ptr %X) { +; CHECK-LABEL: define signext i32 @equalityFoldOneConstant( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 4294967296, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 12884901890, [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[COND:%.*]] = zext i1 [[NOT_TOBOOL]] to i32 +; CHECK-NEXT: ret i32 [[COND]] +; + %call = tail call signext i32 @memcmp(ptr @zeroEqualityTest04.buffer1, ptr %X, i64 16) + %not.tobool = icmp eq i32 %call, 0 + %cond = zext i1 %not.tobool to i32 + ret i32 %cond +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[M:%.*]] = tail call signext i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call signext i32 @memcmp(ptr %X, ptr %Y, i64 2) nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll new file mode 100644 index 0000000000000..ffc49478cfa4d --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp-mergeexpand.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s + +; This tests interaction between MergeICmp and expand-memcmp. + +%"struct.std::pair" = type { i32, i32 } + +define zeroext i1 @opeq1( +; CHECK-LABEL: define zeroext i1 @opeq1( +; CHECK-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; CHECK: land.rhs.i: +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", ptr [[A]], i64 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SECOND_I]], align 4 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", ptr [[B]], i64 0, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SECOND2_I]], align 4 +; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[OPEQ1_EXIT]] +; CHECK: opeq1.exit: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] +; CHECK-NEXT: ret i1 [[TMP4]] +; + ptr nocapture readonly dereferenceable(8) %a, + ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { +entry: + %0 = load i32, ptr %a, align 4 + %1 = load i32, ptr %b, align 4 + %cmp.i = icmp eq i32 %0, %1 + br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit + +land.rhs.i: + %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1 + %2 = load i32, ptr %second.i, align 4 + %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1 + %3 = load i32, ptr %second2.i, align 4 + %cmp3.i = icmp eq i32 %2, %3 + br label %opeq1.exit + +opeq1.exit: + %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] + ret i1 %4 +} + + diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll new file mode 100644 index 0000000000000..21cdbd65544c4 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmp.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s + +define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @memcmp8( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8) + ret i32 %call +} + +define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @memcmp4( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) + ret i32 %call +} + +define signext i32 @memcmp2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @memcmp2( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 2) + ret i32 %call +} + +define signext i32 @memcmp1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @memcmp1( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 1) #2 + ret i32 %call +} + +declare signext i32 @memcmp(ptr, ptr, i64) diff --git a/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll new file mode 100644 index 0000000000000..3ad0c9d12ea0b --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/PowerPC/memcmpIR.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s + +define signext i32 @test1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @test1( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP11:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11]] = call i64 @llvm.bswap.i64(i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP1]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; +entry: + + + + ; CHECK-BE-LABEL: @test1( + ; CHECK-BE-LABEL: res_block:{{.*}} + ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-BE-NEXT: br label %endblock + + ; CHECK-BE-LABEL: loadbb:{{.*}} + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block + + ; CHECK-BE-LABEL: loadbb1:{{.*}} + ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 + ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, ptr {{.*}}, i64 8 + ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]] + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block + + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) + ret i32 %call +} + +declare signext i32 @memcmp(ptr nocapture, ptr nocapture, i64) local_unnamed_addr #1 + +define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @test2( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: ret i32 [[TMP8]] +; + + ; CHECK-BE-LABEL: @test2( + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr + ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32 + ; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]] + ; CHECK-BE-NEXT: ret i32 [[SUB]] + +entry: + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) + ret i32 %call +} + +define signext i32 @test3(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @test3( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1:%.*]] ], [ [[TMP22:%.*]], [[LOADBB2:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1]] ], [ [[TMP23:%.*]], [[LOADBB2]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[BUFFER1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[BUFFER2]], align 1 +; CHECK-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: br i1 [[TMP15]], label [[LOADBB2]], label [[RES_BLOCK]] +; CHECK: loadbb2: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 12 +; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP18]]) +; CHECK-NEXT: [[TMP21:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP19]]) +; CHECK-NEXT: [[TMP22]] = zext i16 [[TMP20]] to i64 +; CHECK-NEXT: [[TMP23]] = zext i16 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: br i1 [[TMP24]], label [[LOADBB3:%.*]], label [[RES_BLOCK]] +; CHECK: loadbb3: +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[BUFFER1]], i64 14 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[BUFFER2]], i64 14 +; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP31]], [[LOADBB3]] ], [ [[TMP1]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + + + + + + ; CHECK-BE-LABEL: res_block:{{.*}} + ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-BE-NEXT: br label %endblock + + ; CHECK-BE-LABEL: loadbb:{{.*}} + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, ptr + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64 + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, ptr + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, ptr + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br label %endblock + +entry: + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 15) + ret i32 %call +} + ; CHECK-BE: call = tail call signext i32 @memcmp +define signext i32 @test4(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { +; CHECK-LABEL: define signext i32 @test4( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 @memcmp(ptr [[BUFFER1]], ptr [[BUFFER2]], i64 65) +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 65) + ret i32 %call +} + +define signext i32 @test5(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2, i32 signext %SIZE) { +; CHECK-LABEL: define signext i32 @test5( +; CHECK-SAME: ptr nocapture readonly [[BUFFER1:%.*]], ptr nocapture readonly [[BUFFER2:%.*]], i32 signext [[SIZE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SIZE]] to i64 +; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 @memcmp(ptr [[BUFFER1]], ptr [[BUFFER2]], i64 [[CONV]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + ; CHECK-BE: call = tail call signext i32 @memcmp +entry: + %conv = sext i32 %SIZE to i64 + %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 %conv) + ret i32 %call +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll index 41d357728b93e..5877d00a818c5 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll @@ -1,16 +1,16 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 declare i32 @bcmp(ptr nocapture, ptr nocapture, i64) define i32 @bcmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @bcmp8( -; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: ret i32 [[TMP6]] +; X64-LABEL: define i32 @bcmp8( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i32 [[TMP4]] ; %call = tail call i32 @bcmp(ptr %x, ptr %y, i64 8) ret i32 %call diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll new file mode 100644 index 0000000000000..4424488a7fffb --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-2.ll @@ -0,0 +1,20249 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=X64 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck %s --check-prefixes=X64-SSE41 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefixes=X64-AVX1 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefixes=X64-AVX2 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW-256 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F-256 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX2 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX512F + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length0( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; X64-NEXT: ret i32 0 +; +; X64-SSE41-LABEL: define i32 @length0( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-SSE41-NEXT: ret i32 0 +; +; X64-AVX1-LABEL: define i32 @length0( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX1-NEXT: ret i32 0 +; +; X64-AVX2-LABEL: define i32 @length0( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX2-NEXT: ret i32 0 +; +; X64-AVX512BW-256-LABEL: define i32 @length0( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512BW-256-NEXT: ret i32 0 +; +; X64-AVX512BW-LABEL: define i32 @length0( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512BW-NEXT: ret i32 0 +; +; X64-AVX512F-256-LABEL: define i32 @length0( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512F-256-NEXT: ret i32 0 +; +; X64-AVX512F-LABEL: define i32 @length0( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512F-NEXT: ret i32 0 +; +; X64-MIC-AVX2-LABEL: define i32 @length0( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-MIC-AVX2-NEXT: ret i32 0 +; +; X64-MIC-AVX512F-LABEL: define i32 @length0( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-MIC-AVX512F-NEXT: ret i32 0 +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length0_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; +; X64-SSE41-LABEL: define i1 @length0_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: ret i1 true +; +; X64-AVX1-LABEL: define i1 @length0_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: ret i1 true +; +; X64-AVX2-LABEL: define i1 @length0_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: ret i1 true +; +; X64-AVX512BW-256-LABEL: define i1 @length0_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: ret i1 true +; +; X64-AVX512BW-LABEL: define i1 @length0_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: ret i1 true +; +; X64-AVX512F-256-LABEL: define i1 @length0_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: ret i1 true +; +; X64-AVX512F-LABEL: define i1 @length0_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: ret i1 true +; +; X64-MIC-AVX2-LABEL: define i1 @length0_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: ret i1 true +; +; X64-MIC-AVX512F-LABEL: define i1 @length0_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: ret i1 true +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length0_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; +; X64-SSE41-LABEL: define i1 @length0_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: ret i1 false +; +; X64-AVX1-LABEL: define i1 @length0_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: ret i1 false +; +; X64-AVX2-LABEL: define i1 @length0_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: ret i1 false +; +; X64-AVX512BW-256-LABEL: define i1 @length0_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: ret i1 false +; +; X64-AVX512BW-LABEL: define i1 @length0_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: ret i1 false +; +; X64-AVX512F-256-LABEL: define i1 @length0_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: ret i1 false +; +; X64-AVX512F-LABEL: define i1 @length0_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: ret i1 false +; +; X64-MIC-AVX2-LABEL: define i1 @length0_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: ret i1 false +; +; X64-MIC-AVX512F-LABEL: define i1 @length0_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: ret i1 false +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] +; +; X64-SSE41-LABEL: define i32 @length2( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: ret i32 [[TMP7]] +; +; X64-AVX1-LABEL: define i32 @length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: ret i32 [[TMP7]] +; +; X64-AVX2-LABEL: define i32 @length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512BW-256-LABEL: define i32 @length2( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512BW-LABEL: define i32 @length2( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512F-256-LABEL: define i32 @length2( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512F-LABEL: define i32 @length2( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: ret i32 [[TMP7]] +; +; X64-MIC-AVX2-LABEL: define i32 @length2( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP7]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length2( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP7]] +; + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i32 @length2_const(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length2_const( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-NEXT: ret i32 [[TMP4]] +; +; X64-SSE41-LABEL: define i32 @length2_const( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-SSE41-NEXT: ret i32 [[TMP4]] +; +; X64-AVX1-LABEL: define i32 @length2_const( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX1-NEXT: ret i32 [[TMP4]] +; +; X64-AVX2-LABEL: define i32 @length2_const( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX2-NEXT: ret i32 [[TMP4]] +; +; X64-AVX512BW-256-LABEL: define i32 @length2_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512BW-256-NEXT: ret i32 [[TMP4]] +; +; X64-AVX512BW-LABEL: define i32 @length2_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512BW-NEXT: ret i32 [[TMP4]] +; +; X64-AVX512F-256-LABEL: define i32 @length2_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512F-256-NEXT: ret i32 [[TMP4]] +; +; X64-AVX512F-LABEL: define i32 @length2_const( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512F-NEXT: ret i32 [[TMP4]] +; +; X64-MIC-AVX2-LABEL: define i32 @length2_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-MIC-AVX2-NEXT: ret i32 [[TMP4]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length2_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP4]] +; + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_gt_const( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_gt_const( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_gt_const( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_gt_const( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_gt_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_gt_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_gt_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_gt_const( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_gt_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_gt_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length2_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-SSE41-LABEL: define i1 @length2_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length3( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length3( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length3( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length3( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length3( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length3( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length3( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length3( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length3( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length3( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length3_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length3_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length3_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length3_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length3_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length3_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length3_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length3_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length3_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length3_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length4( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-SSE41-LABEL: define i32 @length4( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-SSE41-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length4( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length4( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-256-LABEL: define i32 @length4( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-LABEL: define i32 @length4( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-256-LABEL: define i32 @length4( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-LABEL: define i32 @length4( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX2-LABEL: define i32 @length4( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length4( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]] +; + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-SSE41-LABEL: define i1 @length4_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length4_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length4_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-LABEL: define i1 @length4_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-LABEL: define i1 @length4_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: ret i1 [[TMP5]] +; +; X64-SSE41-LABEL: define i1 @length4_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: ret i1 [[TMP5]] +; +; X64-AVX1-LABEL: define i1 @length4_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: ret i1 [[TMP5]] +; +; X64-AVX2-LABEL: define i1 @length4_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-LABEL: define i1 @length4_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-LABEL: define i1 @length4_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: ret i1 [[TMP5]] +; +; X64-SSE41-LABEL: define i1 @length4_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: ret i1 [[TMP5]] +; +; X64-AVX1-LABEL: define i1 @length4_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: ret i1 [[TMP5]] +; +; X64-AVX2-LABEL: define i1 @length4_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-LABEL: define i1 @length4_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-LABEL: define i1 @length4_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length4_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length4_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length4_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length4_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length5( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length5( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length5( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length5( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length5( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length5( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length5( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length5( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length5( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length5( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length5_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length5_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length5_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length5_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length5_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length5_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length5_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length5_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length5_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length5_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length5_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length5_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length5_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length5_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length5_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length5_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length5_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length5_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length5_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length5_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length7( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length7( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length7( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length7( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length7( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length7( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length7( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length7( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length7( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length7( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + ret i32 %m +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length7_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length7_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length7_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length7_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length7_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length7_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length7_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length7_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length7_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length7_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length7_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: ret i1 [[TMP10]] +; +; X64-SSE41-LABEL: define i1 @length7_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP10]] +; +; X64-AVX1-LABEL: define i1 @length7_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length7_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length7_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length7_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length7_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length7_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length7_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length7_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length8( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-SSE41-LABEL: define i32 @length8( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-SSE41-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length8( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length8( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-256-LABEL: define i32 @length8( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-LABEL: define i32 @length8( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-256-LABEL: define i32 @length8( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-LABEL: define i32 @length8( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX2-LABEL: define i32 @length8( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length8( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]] +; + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length8_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length8_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length8_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length8_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length8_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length8_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length8_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length8_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length8_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-SSE41-LABEL: define i1 @length8_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length8_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length8_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length8_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length8_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length8_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length8_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length8_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length8_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length9_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length9_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length9_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length9_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length9_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length9_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length9_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length9_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length9_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length9_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length10_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length10_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length10_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length10_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length10_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length10_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length10_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length10_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length10_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length10_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length11_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length11_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length11_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length11_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length11_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length11_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length11_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length11_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length11_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length11_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length12_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length12_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length12_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length12_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length12_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length12_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length12_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length12_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length12_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length12_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length12( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length12( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-SSE41-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-SSE41-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length12( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length12( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length12( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length12( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512BW-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512BW-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length12( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512F-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512F-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length12( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length12( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length12( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length13_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length13_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length13_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length13_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length13_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length13_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length13_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length13_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length13_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length13_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length14_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length14_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length14_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length14_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length14_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length14_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length14_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length14_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length14_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length14_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length15(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length15( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length15( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length15( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length15( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length15( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length15( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length15( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length15( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length15( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length15( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + ret i32 %m +} + +define i1 @length15_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length15_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length15_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length15_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length15_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length15_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length15_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length15_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length15_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length15_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length15_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length15_const(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length15_const( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length15_const( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-SSE41-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-SSE41-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-SSE41-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length15_const( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX1-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX1-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length15_const( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length15_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512BW-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512BW-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length15_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512BW-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512BW-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512BW-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length15_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512F-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512F-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length15_const( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length15_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-MIC-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-MIC-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length15_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-MIC-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-MIC-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + ret i32 %m +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length15_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length15_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length15_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length15_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length15_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length15_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length15_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length15_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length15_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length15_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length15_gt_const( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length15_gt_const( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-SSE41-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-SSE41-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-SSE41-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length15_gt_const( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX1-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX1-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length15_gt_const( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length15_gt_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512BW-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512BW-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length15_gt_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512BW-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512BW-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512BW-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length15_gt_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512F-256-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512F-256-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length15_gt_const( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length15_gt_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-MIC-AVX2-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-MIC-AVX2-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length15_gt_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4:%.*]], [[LOADBB]] ], [ [[TMP8:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ 3544952156018063160, [[LOADBB]] ], [ 4051322327650219061, [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 3544952156018063160 +; X64-MIC-AVX512F-NEXT: br i1 [[TMP5]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 4051322327650219061 +; X64-MIC-AVX512F-NEXT: br i1 [[TMP9]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length16( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length16( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length16( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length16( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length16( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length16( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length16( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length16( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length16( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length16( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length16_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-SSE41-LABEL: define i1 @length16_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length16_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length16_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-LABEL: define i1 @length16_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-LABEL: define i1 @length16_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-AVX-LABEL: length16_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length16_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length16_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length16_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length16_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length16_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length16_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length16_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length16_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length16_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length16_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length16_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length16_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length16_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length16_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length16_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length16_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length16_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length16_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length16_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX-LABEL: length16_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length16_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length24( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length24( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length24( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length24( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length24( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length24( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length24( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length24( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length24( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length24( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length24_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length24_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length24_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length24_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length24_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length24_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-NEXT: ret i1 [[TMP8]] +; +; X64-SSE41-LABEL: define i1 @length24_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-SSE41-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-SSE41-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-SSE41-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP8]] +; +; X64-AVX1-LABEL: define i1 @length24_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length24_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-LABEL: define i1 @length24_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-LABEL: define i1 @length24_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-AVX-LABEL: length24_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length24_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length31( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length31( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length31( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length31( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length31( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length31( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length31( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length31( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length31( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length31( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length31_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length31_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length31_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length31_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; +; X64-LABEL: define i1 @length31_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length31_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq_prefer128: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length31_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-SSE41-LABEL: define i1 @length31_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length31_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP7]] +; +; X64-AVX2-LABEL: define i1 @length31_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP7]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP7]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP7]] +; +; X64-AVX-LABEL: length31_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length32( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length32( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length32( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length32( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length32( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length32( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length32( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length32( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length32( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length32( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length32_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX512-LABEL: length32_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length32_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length32_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; +; X64-LABEL: define i1 @length32_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length32_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq_prefer128: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length32_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-SSE41-LABEL: define i1 @length32_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512-LABEL: length32_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length48( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length48( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length48( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length48( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length48( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length48( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length48( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length48( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length48( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length48( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length48_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length48_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX512-LABEL: length48_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length48_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length48_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length48_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X64-LABEL: define i1 @length48_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + + + + + + + + + + + + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length48_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $48, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length48_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X64-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X64-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-NEXT: ret i1 [[TMP11]] +; +; X64-SSE41-LABEL: define i1 @length48_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP11]] +; +; X64-AVX1-LABEL: define i1 @length48_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length48_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512-LABEL: length48_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length48_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length63( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length63( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length63( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length63( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length63( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length63( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length63( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length63( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length63( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length63( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length63_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length63_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-NEXT: ret i1 [[TMP22]] +; +; X64-SSE41-LABEL: define i1 @length63_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP22]] +; +; X64-AVX1-LABEL: define i1 @length63_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length63_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length63_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length63_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512-LABEL: length63_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length63_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length63_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length63_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length63_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length63_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length63_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length63_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length63_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length63_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length63_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length63_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length63_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length63_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length63_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $63, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length63_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length63_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length63_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length63_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length63_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length63_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX512-LABEL: length63_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length63_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length64( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length64( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length64( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length64( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length64( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length64( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length64( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length64( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length64( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length64( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length64_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length64_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-NEXT: ret i1 [[TMP22]] +; +; X64-SSE41-LABEL: define i1 @length64_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP22]] +; +; X64-AVX1-LABEL: define i1 @length64_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length64_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length64_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length64_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512-LABEL: length64_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 +; X64-AVX512-NEXT: kortestw %k0, %k0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length64_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length64_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length64_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length64_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length64_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length64_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length64_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length64_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length64_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length64_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length64_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $64, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length64_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length64_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length64_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length64_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length64_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length64_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX512-LABEL: length64_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 +; X64-AVX512-NEXT: kortestw %k0, %k0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length96( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length96( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length96( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length96( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length96( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length96( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length96( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length96( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length96( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length96( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length96_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length96_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP16]] +; +; X64-AVX2-LABEL: define i1 @length96_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512BW-LABEL: define i1 @length96_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512F-LABEL: define i1 @length96_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP16]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length96_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length96_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length96_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length96_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length96_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length96_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length96_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length96_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length96_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length96_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length96_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length96_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length96_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length96_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length96_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length127( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length127( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length127( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length127( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length127( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length127( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length127( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length127( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length127( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length127( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length127_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length127_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP22]] +; +; X64-AVX2-LABEL: define i1 @length127_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-LABEL: define i1 @length127_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-LABEL: define i1 @length127_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length127_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length127_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length127_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length127_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length127_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length127_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length127_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length127_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length127_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length127_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length127_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length127_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length127_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length127_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length127_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length128_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length128_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP22]] +; +; X64-AVX2-LABEL: define i1 @length128_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-LABEL: define i1 @length128_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-LABEL: define i1 @length128_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length128_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length128_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length128_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length128_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length128_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length128_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length128_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length128_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length128_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length128_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length128_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length128_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length128_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length128_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length128_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length192( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length192( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length192( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length192( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length192( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length192( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length192( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length192( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length192( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length192( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length192_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP16]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP16]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length192_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length192_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length192_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length192_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length192_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length192_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length192_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length192_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length255( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length255( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length255( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length255( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length255( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length255( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length255( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length255( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length255( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length255( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length255_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length255_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length255_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length255_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length255_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length255_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length255_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length255_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length255_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length256( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length256( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length256( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length256( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length256( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length256( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length256( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length256( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length256( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length256( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length256_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length256_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length256_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length256_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length256_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length256_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length256_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length256_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length256_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length384( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length384( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length384( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length384( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length384( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length384( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length384( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length384( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length384( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length384( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length384_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length384_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length384_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length384_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length384_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length384_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length511( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length511( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length511( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length511( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length511( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length511( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length511( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length511( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length511( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length511( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length511_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length511_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length511_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length511_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length511_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length511_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length512( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length512( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length512( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length512( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length512( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length512( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length512( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length512( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length512( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length512( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length512_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length512_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length512_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length512_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length512_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length512_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks that we do not do stupid things with huge sizes. +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @huge_length( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @huge_length( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @huge_length( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @huge_length( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @huge_length( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @huge_length( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @huge_length( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @huge_length( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @huge_length( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @huge_length( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @huge_length_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @huge_length_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @huge_length_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @huge_length_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @huge_length_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @huge_length_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @huge_length_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @huge_length_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @huge_length_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @huge_length_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks non-constant sizes. +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { +; X64-LABEL: define i32 @nonconst_length( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @nonconst_length( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @nonconst_length( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @nonconst_length( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @nonconst_length( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @nonconst_length( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @nonconst_length( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @nonconst_length( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @nonconst_length( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @nonconst_length( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { +; X64-LABEL: define i1 @nonconst_length_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @nonconst_length_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @nonconst_length_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @nonconst_length_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @nonconst_length_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @nonconst_length_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/CodeGen/X86/memcmp-constant.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll similarity index 50% rename from llvm/test/CodeGen/X86/memcmp-constant.ll rename to llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll index 2059b8f804082..908c6b34183e5 100644 --- a/llvm/test/CodeGen/X86/memcmp-constant.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-constant.ll @@ -1,5 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD +; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD + @.str1 = private constant [4 x i8] c"\00\00\00\00", align 1 @.str2 = private constant [4 x i8] c"\ff\ff\ff\ff", align 1 @@ -7,49 +9,49 @@ declare i32 @memcmp(ptr, ptr, i64) define i32 @length4_same() nounwind { -; CHECK-LABEL: length4_same: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i32 @length4_same( +; X64-SAME: ) #[[ATTR0:[0-9]+]] { +; X64-NEXT: ret i32 0 +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind ret i32 %m } define i1 @length4_same_lt() nounwind { -; CHECK-LABEL: length4_same_lt: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_same_lt( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind %c = icmp slt i32 %m, 0 ret i1 %c } define i1 @length4_same_gt() nounwind { -; CHECK-LABEL: length4_same_gt: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_same_gt( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind %c = icmp sgt i32 %m, 0 ret i1 %c } define i1 @length4_same_le() nounwind { -; CHECK-LABEL: length4_same_le: -; CHECK: # %bb.0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_same_le( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind %c = icmp sle i32 %m, 0 ret i1 %c } define i1 @length4_same_ge() nounwind { -; CHECK-LABEL: length4_same_ge: -; CHECK: # %bb.0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_same_ge( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str1, i64 4) nounwind %c = icmp sge i32 %m, 0 ret i1 %c @@ -57,52 +59,55 @@ define i1 @length4_same_ge() nounwind { define i32 @length4() nounwind { -; CHECK-LABEL: length4: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i32 @length4( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i32 -1 +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind ret i32 %m } define i1 @length4_lt() nounwind { -; CHECK-LABEL: length4_lt: -; CHECK: # %bb.0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_lt( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind %c = icmp slt i32 %m, 0 ret i1 %c } define i1 @length4_gt() nounwind { -; CHECK-LABEL: length4_gt: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_gt( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind %c = icmp sgt i32 %m, 0 ret i1 %c } define i1 @length4_le() nounwind { -; CHECK-LABEL: length4_le: -; CHECK: # %bb.0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_le( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind %c = icmp sle i32 %m, 0 ret i1 %c } define i1 @length4_ge() nounwind { -; CHECK-LABEL: length4_ge: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq +; X64-LABEL: define i1 @length4_ge( +; X64-SAME: ) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; %m = tail call i32 @memcmp(ptr @.str1, ptr @.str2, i64 4) nounwind %c = icmp sge i32 %m, 0 ret i1 %c } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; X64_1LD: {{.*}} +; X64_2LD: {{.*}} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll new file mode 100644 index 0000000000000..edd70ddb445dc --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize-x32.ll @@ -0,0 +1,493 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i32) + +define i32 @length2(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2:[0-9]+]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2:[0-9]+]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length2_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length2_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length3( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length3( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length3_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length3_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 3) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length4( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length4( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length4_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 4) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length4_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length5( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length5( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length5_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length5_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 5) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length8( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length8( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length8_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 8) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length8_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 8) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 8) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i1 @length12_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length12_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length12( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length12( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length16( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length16( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize { +; +; X86-LABEL: define i1 @length16_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR2]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind minsize { +; +; X86-LABEL: define i1 @length16_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length16_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length24( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length24( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize { +; X86-LABEL: define i1 @length24_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR2]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length24_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length24_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length32( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length32( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize { +; X86-LABEL: define i1 @length32_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR2]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length32_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind minsize { +; X86-LABEL: define i32 @length64( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length64( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize { +; X86-LABEL: define i1 @length64_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR2]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind minsize { +; X86-LABEL: define i1 @length64_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR2]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length64_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR2]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll new file mode 100644 index 0000000000000..431dc15896299 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-minsize.ll @@ -0,0 +1,707 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length2(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR1:[0-9]+]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length2_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind minsize { +; X64-LABEL: define i1 @length2_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length3( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length3( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length3( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length3_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length3_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length3_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 3) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length4( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length4( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length4( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length4_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind minsize { +; X64-LABEL: define i1 @length4_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length5( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length5( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length5( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length5_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length5_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length5_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 5) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length8( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length8( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length8( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length8_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 8) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind minsize { +; X64-LABEL: define i1 @length8_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 8) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i1 @length12_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length12_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length12_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length12( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length12( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length12( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 12) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind minsize { +; +; X64-LABEL: define i32 @length16( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length16( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length16( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind minsize { +; X64-SSE2-LABEL: length16_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rsi), %xmm0 +; X64-SSE2-NEXT: movdqu (%rdi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; X64-SSE2-NEXT: pmovmskb %xmm1, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length16_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR1]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length16_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length16_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 16) #[[ATTR2]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind minsize { +; X64-SSE2-LABEL: length16_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length16_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length16_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length16_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 16) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length24( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length24( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length24( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind minsize { +; X64-LABEL: define i1 @length24_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR1]] +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR2]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind minsize { +; X64-LABEL: define i1 @length24_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length24_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length24_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 24) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length32( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length32( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length32( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind minsize { +; X64-SSE2-LABEL: length32_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: pushq $32 +; X64-SSE2-NEXT: popq %rdx +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length32_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR1]] +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR2]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind minsize { +; X64-SSE2-LABEL: length32_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: pushq $32 +; X64-SSE2-NEXT: popq %rdx +; X64-SSE2-NEXT: movl $.L.str, %esi +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length32_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 32) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind minsize { +; X64-LABEL: define i32 @length64( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR1]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length64( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length64( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind minsize { +; X64-LABEL: define i1 @length64_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR1]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length64_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR2]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind minsize { +; X64-LABEL: define i1 @length64_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR1]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length64_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR2]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length64_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR2]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll new file mode 100644 index 0000000000000..abdadb14086c2 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs-x32.ll @@ -0,0 +1,6203 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefixes=X86 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse < %s | FileCheck %s --check-prefixes=X86-SSE1 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X86-SSE2 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X86-SSE41 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i32) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length0( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-NEXT: ret i32 0 +; +; X86-SSE1-LABEL: define i32 @length0( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE1-NEXT: ret i32 0 +; +; X86-SSE2-LABEL: define i32 @length0( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE2-NEXT: ret i32 0 +; +; X86-SSE41-LABEL: define i32 @length0( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE41-NEXT: ret i32 0 +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length0_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: ret i1 true +; +; X86-SSE1-LABEL: define i1 @length0_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: ret i1 true +; +; X86-SSE2-LABEL: define i1 @length0_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: ret i1 true +; +; X86-SSE41-LABEL: define i1 @length0_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: ret i1 true +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length0_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: ret i1 false +; +; X86-SSE1-LABEL: define i1 @length0_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: ret i1 false +; +; X86-SSE2-LABEL: define i1 @length0_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: ret i1 false +; +; X86-SSE41-LABEL: define i1 @length0_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: ret i1 false +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: ret i32 [[TMP7]] +; +; X86-SSE1-LABEL: define i32 @length2( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: ret i32 [[TMP7]] +; +; X86-SSE2-LABEL: define i32 @length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: ret i32 [[TMP7]] +; +; X86-SSE41-LABEL: define i32 @length2( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length2_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: ret i1 [[TMP2]] +; +; X86-SSE1-LABEL: define i1 @length2_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP2]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP2]] +; +; X86-SSE41-LABEL: define i1 @length2_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length3( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length3( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length3( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length3( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length3_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE1-LABEL: define i1 @length3_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length3_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; +; X86-SSE41-LABEL: define i1 @length3_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length4( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-NEXT: ret i32 [[TMP9]] +; +; X86-SSE1-LABEL: define i32 @length4( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE1-NEXT: ret i32 [[TMP9]] +; +; X86-SSE2-LABEL: define i32 @length4( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE2-NEXT: ret i32 [[TMP9]] +; +; X86-SSE41-LABEL: define i32 @length4( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE41-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i1 [[TMP3]] +; +; X86-SSE1-LABEL: define i1 @length4_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP3]] +; +; X86-SSE2-LABEL: define i1 @length4_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; +; X86-SSE41-LABEL: define i1 @length4_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: ret i1 [[TMP5]] +; +; X86-SSE1-LABEL: define i1 @length4_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: ret i1 [[TMP5]] +; +; X86-SSE2-LABEL: define i1 @length4_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: ret i1 [[TMP5]] +; +; X86-SSE41-LABEL: define i1 @length4_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: ret i1 [[TMP5]] +; +; X86-SSE1-LABEL: define i1 @length4_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: ret i1 [[TMP5]] +; +; X86-SSE2-LABEL: define i1 @length4_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: ret i1 [[TMP5]] +; +; X86-SSE41-LABEL: define i1 @length4_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length4_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length4_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length4_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length5( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length5( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length5( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length5( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length5_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE1-LABEL: define i1 @length5_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length5_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; +; X86-SSE41-LABEL: define i1 @length5_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length5_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length5_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length5_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length5_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length7( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length7( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length7( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length7( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + ret i32 %m +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length7_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: ret i1 [[TMP10]] +; +; X86-SSE1-LABEL: define i1 @length7_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP10]] +; +; X86-SSE2-LABEL: define i1 @length7_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP10]] +; +; X86-SSE41-LABEL: define i1 @length7_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP10]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length7_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length7_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length7_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length7_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length8( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length8( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length8( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length8( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length8_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length8_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length8_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length8_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-NEXT: ret i1 [[TMP7]] +; +; X86-SSE1-LABEL: define i1 @length8_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE1-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP7]] +; +; X86-SSE2-LABEL: define i1 @length8_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length8_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length9_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 +; X86-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length9_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 +; X86-SSE1-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE1-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length9_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 +; X86-SSE2-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE2-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length9_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 +; X86-SSE41-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE41-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length10_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32 +; X86-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length10_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32 +; X86-SSE1-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE1-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length10_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32 +; X86-SSE2-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE2-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length10_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = zext i16 [[TMP12]] to i32 +; X86-SSE41-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[TMP15]] +; X86-SSE41-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP19]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length11_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length11_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length11_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length11_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length12_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-NEXT: ret i1 [[TMP16]] +; +; X86-SSE1-LABEL: define i1 @length12_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP16]] +; +; X86-SSE2-LABEL: define i1 @length12_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP16]] +; +; X86-SSE41-LABEL: define i1 @length12_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP16]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length12( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86: loadbb2: +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length12( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE1: loadbb2: +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length12( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE2: loadbb2: +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length12( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE41: loadbb2: +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length13_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1 +; X86-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32 +; X86-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32 +; X86-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length13_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32 +; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32 +; X86-SSE1-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE1-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE1-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length13_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE2-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32 +; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32 +; X86-SSE2-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE2-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE2-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length13_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE41-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP14]], align 1 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = zext i8 [[TMP16]] to i32 +; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i8 [[TMP17]] to i32 +; X86-SSE41-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE41-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE41-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE41-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length14_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1 +; X86-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32 +; X86-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32 +; X86-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length14_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32 +; X86-SSE1-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32 +; X86-SSE1-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE1-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE1-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length14_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32 +; X86-SSE2-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32 +; X86-SSE2-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE2-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE2-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length14_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE41-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP14]], align 1 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = zext i16 [[TMP16]] to i32 +; X86-SSE41-NEXT: [[TMP19:%.*]] = zext i16 [[TMP17]] to i32 +; X86-SSE41-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP20]] +; X86-SSE41-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; X86-SSE41-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; X86-SSE41-NEXT: [[TMP25:%.*]] = zext i1 [[TMP24]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP25]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length15_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11 +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11 +; X86-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length15_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11 +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11 +; X86-SSE1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-SSE1-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-SSE1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length15_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11 +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11 +; X86-SSE2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-SSE2-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-SSE2-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length15_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 11 +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 11 +; X86-SSE41-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-SSE41-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-SSE41-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP23]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length16( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86: loadbb2: +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86: loadbb3: +; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length16( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE1: loadbb2: +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE1: loadbb3: +; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length16( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE2: loadbb2: +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE2: loadbb3: +; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length16( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE41: loadbb2: +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE41: loadbb3: +; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length16_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl (%edx), %esi +; X86-NOSSE-NEXT: movl 4(%edx), %eax +; X86-NOSSE-NEXT: xorl (%ecx), %esi +; X86-NOSSE-NEXT: xorl 4(%ecx), %eax +; X86-NOSSE-NEXT: orl %esi, %eax +; X86-NOSSE-NEXT: movl 8(%edx), %esi +; X86-NOSSE-NEXT: xorl 8(%ecx), %esi +; X86-NOSSE-NEXT: movl 12(%edx), %edx +; X86-NOSSE-NEXT: xorl 12(%ecx), %edx +; X86-NOSSE-NEXT: orl %esi, %edx +; X86-NOSSE-NEXT: orl %eax, %edx +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-NEXT: ret i1 [[TMP22]] +; +; X86-SSE1-LABEL: define i1 @length16_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 1 +; X86-SSE1-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 1 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; X86-SSE1-NEXT: [[TMP19:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP20:%.*]] = or i32 [[TMP13]], [[TMP18]] +; X86-SSE1-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; X86-SSE1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP22]] +; +; X86-SSE2-LABEL: define i1 @length16_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; +; X86-SSE41-LABEL: define i1 @length16_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length16_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86: loadbb2: +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86: loadbb3: +; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length16_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE1: loadbb2: +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE1: loadbb3: +; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE2: loadbb2: +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE2: loadbb3: +; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length16_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE41: loadbb2: +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE41: loadbb3: +; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length16_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86: loadbb2: +; X86-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86: loadbb3: +; X86-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length16_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE1: loadbb2: +; X86-SSE1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE1-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE1-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE1: loadbb3: +; X86-SSE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE1-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE1-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE1-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE2: loadbb2: +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE2-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE2-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE2: loadbb3: +; X86-SSE2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE2-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE2-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE2-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length16_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X86-SSE41: loadbb2: +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 1 +; X86-SSE41-NEXT: [[TMP19]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X86-SSE41-NEXT: [[TMP20]] = call i32 @llvm.bswap.i32(i32 [[TMP18]]) +; X86-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X86-SSE41: loadbb3: +; X86-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 12 +; X86-SSE41-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1 +; X86-SSE41-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 1 +; X86-SSE41-NEXT: [[TMP26]] = call i32 @llvm.bswap.i32(i32 [[TMP24]]) +; X86-SSE41-NEXT: [[TMP27]] = call i32 @llvm.bswap.i32(i32 [[TMP25]]) +; X86-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] +; X86-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length16_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl $858927408, %ecx # imm = 0x33323130 +; X86-NOSSE-NEXT: xorl (%eax), %ecx +; X86-NOSSE-NEXT: movl $926299444, %edx # imm = 0x37363534 +; X86-NOSSE-NEXT: xorl 4(%eax), %edx +; X86-NOSSE-NEXT: orl %ecx, %edx +; X86-NOSSE-NEXT: movl $825243960, %ecx # imm = 0x31303938 +; X86-NOSSE-NEXT: xorl 8(%eax), %ecx +; X86-NOSSE-NEXT: movl $892613426, %esi # imm = 0x35343332 +; X86-NOSSE-NEXT: xorl 12(%eax), %esi +; X86-NOSSE-NEXT: orl %ecx, %esi +; X86-NOSSE-NEXT: orl %edx, %esi +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 825243960 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP11:%.*]] = xor i32 [[TMP10]], 892613426 +; X86-NEXT: [[TMP12:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-NEXT: [[TMP13:%.*]] = or i32 [[TMP8]], [[TMP11]] +; X86-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; X86-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +; X86-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length16_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], 825243960 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 12 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = xor i32 [[TMP10]], 892613426 +; X86-SSE1-NEXT: [[TMP12:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE1-NEXT: [[TMP13:%.*]] = or i32 [[TMP8]], [[TMP11]] +; X86-SSE1-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +; X86-SSE1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length16_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length16_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length24( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length24( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length24( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length24( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5:[0-9]+]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length24_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length24_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length24_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length24_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length24_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length31( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length31( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length31( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length31( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length31_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length31_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length31_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length31_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length31_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length31_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length31_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length31_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length32( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length32( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length32( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length32( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length32_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length32_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length32_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length32_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length32_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length32_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length32_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length48( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length48( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length48( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length48( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length48_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $48 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length48_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length48_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length48_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length48_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $48 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length48_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X86-SSE2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X86-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length48_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $48 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length48_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length48_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length48_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP11]] +; +; X86-SSE41-LABEL: define i1 @length48_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP11]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length63( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length63( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length63( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length63( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length63_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $63 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length63_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X86-SSE2-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X86-SSE2-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X86-SSE2-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP22]] +; +; X86-SSE41-LABEL: define i1 @length63_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X86-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X86-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X86-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP22]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length63_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length63_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length63_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length63_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length63_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $63 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length63_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length63_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length63_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X86-SSE2-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X86-SSE2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length63_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X86-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X86-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length64( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length64( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length64( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length64( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length64_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $64 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length64_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X86-SSE2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X86-SSE2-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X86-SSE2-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X86-SSE2-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X86-SSE2-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X86-SSE2-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X86-SSE2-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X86-SSE2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP22]] +; +; X86-SSE41-LABEL: define i1 @length64_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X86-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X86-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X86-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X86-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X86-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X86-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X86-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X86-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X86-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP22]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length64_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length64_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length64_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length64_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length64_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $64 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length64_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length64_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length64_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X86-SSE2-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X86-SSE2-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X86-SSE2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length64_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X86-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X86-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X86-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length96( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length96( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length96( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length96( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length96_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length96_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length96_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length96_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length127( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length127( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length127( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length127( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length127_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length127_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length127_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length127_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length128_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length128_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length128_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length128_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length192( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length192( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length192( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length192( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length192_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length192_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length192_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length192_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length255( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length255( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length255( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length255( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length255_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length255_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length255_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length255_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length256( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length256( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length256( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length256( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length256_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length256_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length256_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length256_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length384( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length384( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length384( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length384( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length384_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length384_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length384_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length384_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length511( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length511( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length511( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length511( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length511_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length511_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length511_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length511_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length512( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length512( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length512( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length512( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length512_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length512_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length512_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length512_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks that we do not do stupid things with huge sizes. +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @huge_length( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @huge_length( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @huge_length( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @huge_length( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @huge_length_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @huge_length_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @huge_length_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @huge_length_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks non-constant sizes. +define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind { +; X86-LABEL: define i32 @nonconst_length( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @nonconst_length( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @nonconst_length( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @nonconst_length( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind { +; X86-LABEL: define i1 @nonconst_length_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @nonconst_length_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @nonconst_length_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @nonconst_length_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll new file mode 100644 index 0000000000000..56489a08800b7 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-more-load-pairs.ll @@ -0,0 +1,18833 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=X64 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck %s --check-prefixes=X64-SSE41 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefixes=X64-AVX1 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefixes=X64-AVX2 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW-256 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit < %s | FileCheck %s --check-prefixes=X64-AVX512BW +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F-256 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-AVX512F +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX2 +; RUN: opt -S -passes=expand-memcmp -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers < %s | FileCheck %s --check-prefixes=X64-MIC-AVX512F + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length0( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; X64-NEXT: ret i32 0 +; +; X64-SSE41-LABEL: define i32 @length0( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-SSE41-NEXT: ret i32 0 +; +; X64-AVX1-LABEL: define i32 @length0( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX1-NEXT: ret i32 0 +; +; X64-AVX2-LABEL: define i32 @length0( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX2-NEXT: ret i32 0 +; +; X64-AVX512BW-256-LABEL: define i32 @length0( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512BW-256-NEXT: ret i32 0 +; +; X64-AVX512BW-LABEL: define i32 @length0( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512BW-NEXT: ret i32 0 +; +; X64-AVX512F-256-LABEL: define i32 @length0( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512F-256-NEXT: ret i32 0 +; +; X64-AVX512F-LABEL: define i32 @length0( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX512F-NEXT: ret i32 0 +; +; X64-MIC-AVX2-LABEL: define i32 @length0( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-MIC-AVX2-NEXT: ret i32 0 +; +; X64-MIC-AVX512F-LABEL: define i32 @length0( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-MIC-AVX512F-NEXT: ret i32 0 +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length0_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: ret i1 true +; +; X64-SSE41-LABEL: define i1 @length0_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: ret i1 true +; +; X64-AVX1-LABEL: define i1 @length0_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: ret i1 true +; +; X64-AVX2-LABEL: define i1 @length0_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: ret i1 true +; +; X64-AVX512BW-256-LABEL: define i1 @length0_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: ret i1 true +; +; X64-AVX512BW-LABEL: define i1 @length0_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: ret i1 true +; +; X64-AVX512F-256-LABEL: define i1 @length0_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: ret i1 true +; +; X64-AVX512F-LABEL: define i1 @length0_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: ret i1 true +; +; X64-MIC-AVX2-LABEL: define i1 @length0_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: ret i1 true +; +; X64-MIC-AVX512F-LABEL: define i1 @length0_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: ret i1 true +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length0_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: ret i1 false +; +; X64-SSE41-LABEL: define i1 @length0_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: ret i1 false +; +; X64-AVX1-LABEL: define i1 @length0_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: ret i1 false +; +; X64-AVX2-LABEL: define i1 @length0_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: ret i1 false +; +; X64-AVX512BW-256-LABEL: define i1 @length0_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: ret i1 false +; +; X64-AVX512BW-LABEL: define i1 @length0_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: ret i1 false +; +; X64-AVX512F-256-LABEL: define i1 @length0_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: ret i1 false +; +; X64-AVX512F-LABEL: define i1 @length0_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: ret i1 false +; +; X64-MIC-AVX2-LABEL: define i1 @length0_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: ret i1 false +; +; X64-MIC-AVX512F-LABEL: define i1 @length0_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: ret i1 false +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] +; +; X64-SSE41-LABEL: define i32 @length2( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: ret i32 [[TMP7]] +; +; X64-AVX1-LABEL: define i32 @length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: ret i32 [[TMP7]] +; +; X64-AVX2-LABEL: define i32 @length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512BW-256-LABEL: define i32 @length2( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512BW-LABEL: define i32 @length2( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512F-256-LABEL: define i32 @length2( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP7]] +; +; X64-AVX512F-LABEL: define i32 @length2( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: ret i32 [[TMP7]] +; +; X64-MIC-AVX2-LABEL: define i32 @length2( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP7]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length2( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP7]] +; + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length2_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-SSE41-LABEL: define i1 @length2_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length3( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length3( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length3( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length3( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length3( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length3( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length3( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length3( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length3( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length3( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length3_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length3_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length3_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length3_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length3_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length3_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length3_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length3_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length3_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length3_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length4( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-SSE41-LABEL: define i32 @length4( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-SSE41-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length4( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length4( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-256-LABEL: define i32 @length4( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-LABEL: define i32 @length4( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-256-LABEL: define i32 @length4( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-LABEL: define i32 @length4( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX2-LABEL: define i32 @length4( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length4( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]] +; + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-SSE41-LABEL: define i1 @length4_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length4_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length4_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-LABEL: define i1 @length4_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-LABEL: define i1 @length4_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: ret i1 [[TMP5]] +; +; X64-SSE41-LABEL: define i1 @length4_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: ret i1 [[TMP5]] +; +; X64-AVX1-LABEL: define i1 @length4_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: ret i1 [[TMP5]] +; +; X64-AVX2-LABEL: define i1 @length4_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-LABEL: define i1 @length4_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-LABEL: define i1 @length4_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length4_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: ret i1 [[TMP5]] +; +; X64-SSE41-LABEL: define i1 @length4_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: ret i1 [[TMP5]] +; +; X64-AVX1-LABEL: define i1 @length4_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: ret i1 [[TMP5]] +; +; X64-AVX2-LABEL: define i1 @length4_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512BW-LABEL: define i1 @length4_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: ret i1 [[TMP5]] +; +; X64-AVX512F-LABEL: define i1 @length4_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: ret i1 [[TMP5]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP5]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length4_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length4_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length4_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length4_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length4_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length4_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length4_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length4_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length5( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length5( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length5( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length5( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length5( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length5( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length5( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length5( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length5( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length5( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length5_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length5_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length5_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length5_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length5_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length5_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length5_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length5_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length5_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length5_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length5_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length5_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br label [[ENDBLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length5_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length5_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length5_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length5_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br label [[ENDBLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length5_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length5_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length5_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length5_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + +; X64-SSE2: res_block: + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length7( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length7( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length7( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length7( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length7( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length7( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length7( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length7( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length7( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length7( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + ret i32 %m +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length7_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: ret i1 [[TMP10]] +; +; X64-SSE41-LABEL: define i1 @length7_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP10]] +; +; X64-AVX1-LABEL: define i1 @length7_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length7_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length7_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length7_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length7_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length7_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length7_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length7_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length7_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length7_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length7_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length7_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length7_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length7_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length7_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length7_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length7_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length7_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length8( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-SSE41-LABEL: define i32 @length8( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-SSE41-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-SSE41-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length8( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length8( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-256-LABEL: define i32 @length8( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512BW-LABEL: define i32 @length8( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512BW-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-256-LABEL: define i32 @length8( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-256-NEXT: ret i32 [[TMP9]] +; +; X64-AVX512F-LABEL: define i32 @length8( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX512F-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX2-LABEL: define i32 @length8( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX2-NEXT: ret i32 [[TMP9]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length8( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: ret i32 [[TMP9]] +; + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length8_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length8_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length8_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length8_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length8_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length8_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length8_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length8_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length8_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-SSE41-LABEL: define i1 @length8_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length8_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length8_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length8_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length8_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length8_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length8_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length8_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length8_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length9_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length9_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length9_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length9_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length9_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length9_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length9_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length9_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length9_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length9_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length10_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length10_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length10_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length10_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length10_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length10_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length10_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length10_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length10_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length10_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length11_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length11_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length11_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length11_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length11_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length11_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length11_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length11_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length11_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length11_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length12_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-SSE41-LABEL: define i1 @length12_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length12_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length12_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-256-LABEL: define i1 @length12_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512BW-LABEL: define i1 @length12_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length12_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-LABEL: define i1 @length12_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length12_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length12_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length12( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length12( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-SSE41-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-SSE41-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length12( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length12( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length12( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length12( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512BW-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512BW-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length12( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512F-256-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512F-256-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length12( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length12( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length12( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length13_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length13_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length13_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length13_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length13_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length13_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length13_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length13_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length13_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length13_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length14_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length14_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length14_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length14_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length14_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length14_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length14_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length14_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length14_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length14_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @length15_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length15_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length15_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length15_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length15_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length15_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length15_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length15_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length15_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length15_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + + + + + + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length16( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length16( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length16( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length16( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length16( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length16( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length16( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length16( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length16( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length16( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length16_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-SSE41-LABEL: define i1 @length16_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length16_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length16_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512BW-LABEL: define i1 @length16_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-LABEL: define i1 @length16_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-AVX-LABEL: length16_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length16_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length16_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length16_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length16_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length16_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length16_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length16_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length16_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length16_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length16_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length16_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length16_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length16_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length16_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length16_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length16_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length16_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length16_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length16_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length16_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length16_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length16_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length16_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX-LABEL: length16_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length16_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length24( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length24( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length24( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length24( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length24( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length24( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length24( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length24( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length24( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length24( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length24_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-SSE41-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length24_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length24_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm1 +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length24_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length24_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length24_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length24_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length24_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length24_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-NEXT: ret i1 [[TMP8]] +; +; X64-SSE41-LABEL: define i1 @length24_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-SSE41-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-SSE41-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-SSE41-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-SSE41-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP8]] +; +; X64-AVX1-LABEL: define i1 @length24_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length24_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-256-LABEL: define i1 @length24_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-LABEL: define i1 @length24_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-256-LABEL: define i1 @length24_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-LABEL: define i1 @length24_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX2-LABEL: define i1 @length24_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length24_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-AVX-LABEL: length24_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length24_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length31( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length31( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length31( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length31( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length31( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length31( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length31( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length31( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length31( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length31( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length31_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length31_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length31_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length31_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 23 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; +; X64-LABEL: define i1 @length31_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length31_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length31_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq_prefer128: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length31_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-SSE41-LABEL: define i1 @length31_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length31_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP7]] +; +; X64-AVX2-LABEL: define i1 @length31_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512BW-256-LABEL: define i1 @length31_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512BW-LABEL: define i1 @length31_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512F-256-LABEL: define i1 @length31_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP7]] +; +; X64-AVX512F-LABEL: define i1 @length31_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP7]] +; +; X64-MIC-AVX2-LABEL: define i1 @length31_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP7]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length31_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP7]] +; +; X64-AVX-LABEL: length31_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length31_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 15(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [943142453,842084409,909456435,809056311] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length32( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-SSE41-LABEL: define i32 @length32( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length32( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length32( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-256-LABEL: define i32 @length32( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512BW-LABEL: define i32 @length32( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-256-LABEL: define i32 @length32( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX512F-LABEL: define i32 @length32( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX2-LABEL: define i32 @length32( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: ret i32 [[PHI_RES]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length32( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: ret i32 [[PHI_RES]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length32_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX512-LABEL: length32_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length32_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length32_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64: loadbb2: +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64: loadbb3: +; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: br label [[LOADBB:%.*]] +; X64-SSE41: res_block: +; X64-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X64-SSE41: loadbb: +; X64-SSE41-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-SSE41-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-SSE41: loadbb1: +; X64-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-SSE41-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-SSE41: loadbb2: +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-SSE41-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-SSE41-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-SSE41-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-SSE41: loadbb3: +; X64-SSE41-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-SSE41-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-SSE41-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-SSE41-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-SSE41-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-SSE41-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-SSE41-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-SSE41-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-SSE41: endblock: +; X64-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX1: loadbb2: +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX1-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX1-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX1: loadbb3: +; X64-AVX1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX1-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX1-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX1-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX2: loadbb2: +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX2: loadbb3: +; X64-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW-256: res_block: +; X64-AVX512BW-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW-256: loadbb: +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW-256: loadbb1: +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb2: +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW-256: loadbb3: +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW-256: endblock: +; X64-AVX512BW-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512BW: res_block: +; X64-AVX512BW-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512BW-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512BW-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512BW: loadbb: +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512BW-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512BW: loadbb1: +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512BW-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512BW-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb2: +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512BW-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512BW-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512BW: loadbb3: +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512BW-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512BW-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512BW-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512BW-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512BW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512BW-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512BW: endblock: +; X64-AVX512BW-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F-256: res_block: +; X64-AVX512F-256-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-256-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-256-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F-256: loadbb: +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-256-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-256-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F-256: loadbb1: +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-256-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb2: +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-256-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-256-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F-256: loadbb3: +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-256-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-256-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-256-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-256-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-256-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-256-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F-256: endblock: +; X64-AVX512F-256-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-AVX512F: res_block: +; X64-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX512F: loadbb: +; X64-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX512F: loadbb1: +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb2: +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-AVX512F: loadbb3: +; X64-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX512F: endblock: +; X64-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX2: res_block: +; X64-MIC-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX2: loadbb: +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX2: loadbb1: +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb2: +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX2-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX2: loadbb3: +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX2-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX2-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX2-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX2: endblock: +; X64-MIC-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: br label [[LOADBB:%.*]] +; X64-MIC-AVX512F: res_block: +; X64-MIC-AVX512F-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ], [ [[TMP19:%.*]], [[LOADBB2:%.*]] ], [ [[TMP26:%.*]], [[LOADBB3:%.*]] ] +; X64-MIC-AVX512F-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ], [ [[TMP20:%.*]], [[LOADBB2]] ], [ [[TMP27:%.*]], [[LOADBB3]] ] +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-MIC-AVX512F-NEXT: br label [[ENDBLOCK:%.*]] +; X64-MIC-AVX512F: loadbb: +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-MIC-AVX512F-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-MIC-AVX512F: loadbb1: +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-MIC-AVX512F-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP14]], label [[LOADBB2]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb2: +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP16]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP19]] = call i64 @llvm.bswap.i64(i64 [[TMP17]]) +; X64-MIC-AVX512F-NEXT: [[TMP20]] = call i64 @llvm.bswap.i64(i64 [[TMP18]]) +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP21]], label [[LOADBB3]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: loadbb3: +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[X]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[Y]], i64 24 +; X64-MIC-AVX512F-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP26]] = call i64 @llvm.bswap.i64(i64 [[TMP24]]) +; X64-MIC-AVX512F-NEXT: [[TMP27]] = call i64 @llvm.bswap.i64(i64 [[TMP25]]) +; X64-MIC-AVX512F-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP26]], [[TMP27]] +; X64-MIC-AVX512F-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-MIC-AVX512F: endblock: +; X64-MIC-AVX512F-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[PHI_RES]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + +; X64-SSE2: res_block: + + + + + +; X64-SSE2: loadbb: + + + + + + +; X64-SSE2: loadbb1: + + + + + + + + +; X64-SSE2: loadbb2: + + + + + + + + +; X64-SSE2: loadbb3: + + + + + + + + +; X64-SSE2: endblock: + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; +; X64-LABEL: define i1 @length32_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length32_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length32_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq_prefer128: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length32_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-SSE41-LABEL: define i1 @length32_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-256-LABEL: define i1 @length32_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512BW-LABEL: define i1 @length32_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-256-LABEL: define i1 @length32_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512F-LABEL: define i1 @length32_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX2-LABEL: define i1 @length32_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP2]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length32_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP2]] +; +; X64-AVX512-LABEL: length32_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length32_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-MIC-AVX-NEXT: kortestw %k0, %k0 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length48( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length48( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length48( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length48( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length48( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length48( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length48( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length48( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length48( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length48( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5:[0-9]+]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length48_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i128 [[TMP6]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i128 [[TMP7]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i256 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i256 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i256 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX512-LABEL: length48_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-AVX512-NEXT: vmovdqu 32(%rsi), %xmm2 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length48_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm2, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length48_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length48_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 48) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; +; X64-LABEL: define i1 @length48_eq_prefer128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length48_eq_prefer128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-SSE41-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq_prefer128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq_prefer128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_prefer128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i128 [[TMP14]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-AVX-LABEL: length48_eq_prefer128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 +; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm1 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-MIC-AVX-LABEL: length48_eq_prefer128: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %xmm3 +; X64-MIC-AVX-NEXT: vmovdqu 16(%rsi), %xmm4 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rsi), %xmm5 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm4, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm0, %k1 +; X64-MIC-AVX-NEXT: korw %k0, %k1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm5, %zmm2, %k1 +; X64-MIC-AVX-NEXT: kortestw %k1, %k0 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length48_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X64-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X64-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-NEXT: ret i1 [[TMP11]] +; +; X64-SSE41-LABEL: define i1 @length48_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP10:%.*]] = or i128 [[TMP9]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; X64-SSE41-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP11]] +; +; X64-AVX1-LABEL: define i1 @length48_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length48_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-256-LABEL: define i1 @length48_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512BW-LABEL: define i1 @length48_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-256-LABEL: define i1 @length48_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512F-LABEL: define i1 @length48_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX2-LABEL: define i1 @length48_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP8]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length48_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = zext i128 [[TMP4]] to i256 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = xor i256 [[TMP5]], 73389002901949112059321871464991568690 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = or i256 [[TMP2]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = icmp ne i256 [[TMP7]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP8]] +; +; X64-AVX512-LABEL: length48_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length48_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 32(%rdi), %xmm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [892613426,959985462,858927408,926299444,0,0,0,0] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length63( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length63( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length63( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length63( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length63( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length63( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length63( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length63( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length63( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length63( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length63_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-NEXT: ret i1 [[TMP22]] +; +; X64-SSE41-LABEL: define i1 @length63_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 47 +; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP22]] +; +; X64-AVX1-LABEL: define i1 @length63_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length63_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length63_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length63_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512-LABEL: length63_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length63_eq: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqu (%rsi), %ymm2 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rsi), %ymm3 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm3, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: setne %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length63_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length63_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length63_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length63_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length63_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length63_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length63_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length63_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length63_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length63_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length63_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length63_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 63) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length63_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length63_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 47 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 66716800424378146251538984255488604215 +; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length63_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length63_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length63_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length63_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length63_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length63_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length63_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length63_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 31 +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 22702550761799267355187145649125784605216755694630776232256222584591002841649 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX512-LABEL: length63_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vptest %ymm0, %ymm0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq +; X64-MIC-AVX-LABEL: length63_eq_const: +; X64-MIC-AVX: # %bb.0: +; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %ymm0 +; X64-MIC-AVX-NEXT: vmovdqu 31(%rdi), %ymm1 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [875770417,943142453,842084409,909456435,809056311,875770417,943142453,842084409] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0 +; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [858927408,926299444,825243960,892613426,959985462,858927408,926299444,825243960] +; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 +; X64-MIC-AVX-NEXT: kortestw %k0, %k1 +; X64-MIC-AVX-NEXT: sete %al +; X64-MIC-AVX-NEXT: vzeroupper +; X64-MIC-AVX-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length64( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length64( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length64( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length64( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length64( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length64( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length64( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length64( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length64( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length64( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; +; X64-LABEL: define i1 @length64_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X64-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-NEXT: ret i1 [[TMP22]] +; +; X64-SSE41-LABEL: define i1 @length64_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-SSE41-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 1 +; X64-SSE41-NEXT: [[TMP13:%.*]] = xor i128 [[TMP11]], [[TMP12]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-SSE41-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 48 +; X64-SSE41-NEXT: [[TMP16:%.*]] = load i128, ptr [[TMP14]], align 1 +; X64-SSE41-NEXT: [[TMP17:%.*]] = load i128, ptr [[TMP15]], align 1 +; X64-SSE41-NEXT: [[TMP18:%.*]] = xor i128 [[TMP16]], [[TMP17]] +; X64-SSE41-NEXT: [[TMP19:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-SSE41-NEXT: [[TMP20:%.*]] = or i128 [[TMP13]], [[TMP18]] +; X64-SSE41-NEXT: [[TMP21:%.*]] = or i128 [[TMP19]], [[TMP20]] +; X64-SSE41-NEXT: [[TMP22:%.*]] = icmp ne i128 [[TMP21]], 0 +; X64-SSE41-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-SSE41-NEXT: ret i1 [[TMP22]] +; +; X64-AVX1-LABEL: define i1 @length64_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length64_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512BW-LABEL: define i1 @length64_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-LABEL: define i1 @length64_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP3]] +; +; X64-AVX512-LABEL: length64_eq: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512-NEXT: vpcmpneqd (%rsi), %zmm0, %k0 +; X64-AVX512-NEXT: kortestw %k0, %k0 +; X64-AVX512-NEXT: setne %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length64_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length64_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length64_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length64_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length64_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length64_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length64_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length64_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length64_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length64_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; +; X64-LABEL: define i1 @length64_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X64-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length64_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-SSE41-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP6]], align 1 +; X64-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP7]], 73389002901949112059321871464991568690 +; X64-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 48 +; X64-SSE41-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP9]], align 1 +; X64-SSE41-NEXT: [[TMP11:%.*]] = xor i128 [[TMP10]], 68051240286688436651889234231545575736 +; X64-SSE41-NEXT: [[TMP12:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-SSE41-NEXT: [[TMP13:%.*]] = or i128 [[TMP8]], [[TMP11]] +; X64-SSE41-NEXT: [[TMP14:%.*]] = or i128 [[TMP12]], [[TMP13]] +; X64-SSE41-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; X64-SSE41-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length64_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length64_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length64_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length64_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length64_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length64_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length64_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length64_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = icmp ne i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; +; X64-AVX512-LABEL: length64_eq_const: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512-NEXT: vpcmpneqd .L.str(%rip), %zmm0, %k0 +; X64-AVX512-NEXT: kortestw %k0, %k0 +; X64-AVX512-NEXT: sete %al +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length96( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length96( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length96( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length96( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length96( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length96( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length96( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length96( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length96( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length96( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length96_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length96_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX1-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP16]] +; +; X64-AVX2-LABEL: define i1 @length96_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512BW-LABEL: define i1 @length96_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP12]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512F-LABEL: define i1 @length96_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP12]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = or i256 [[TMP14]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = icmp ne i256 [[TMP15]], 0 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP16]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = zext i256 [[TMP6]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = zext i256 [[TMP7]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = xor i512 [[TMP8]], [[TMP9]] +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = or i512 [[TMP3]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = icmp ne i512 [[TMP11]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP12]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length96_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length96_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length96_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length96_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length96_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length96_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length96_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length96_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length96_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length96_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 96) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length96_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $96, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length96_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length96_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 96) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length96_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length96_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length96_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length96_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length96_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length96_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length96_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = or i256 [[TMP9]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = icmp ne i256 [[TMP10]], 0 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP12]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length96_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = zext i256 [[TMP5]] to i512 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP6]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length127( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length127( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length127( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length127( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length127( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length127( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length127( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length127( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length127( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length127( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length127_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length127_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP22]] +; +; X64-AVX2-LABEL: define i1 @length127_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-LABEL: define i1 @length127_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-LABEL: define i1 @length127_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length127_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length127_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length127_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length127_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length127_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length127_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length127_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length127_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length127_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length127_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 127) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length127_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $127, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length127_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length127_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 127) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length127_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length127_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length127_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length127_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length127_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length127_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length127_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 95 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24518896988982801982081367250212210778372643504230047123819838724519570650677 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length127_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 63 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 63), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length128( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length128( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length128( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length128( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length128( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length128( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length128( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length128( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length128( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length128( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length128_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length128_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX1-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX1-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX1-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX1-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX1-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX1-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX1-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX1-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP22]] +; +; X64-AVX2-LABEL: define i1 @length128_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512BW-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512BW-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512BW-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512BW-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512BW-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512BW-LABEL: define i1 @length128_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP10]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-AVX512F-256-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-AVX512F-256-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-AVX512F-256-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX512F-256-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-AVX512F-256-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-AVX512F-256-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-AVX512F-256-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-256-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-LABEL: define i1 @length128_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP10]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = load i256, ptr [[TMP10]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = xor i256 [[TMP11]], [[TMP12]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = load i256, ptr [[TMP14]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP17:%.*]] = load i256, ptr [[TMP15]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP18:%.*]] = xor i256 [[TMP16]], [[TMP17]] +; X64-MIC-AVX2-NEXT: [[TMP19:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-MIC-AVX2-NEXT: [[TMP20:%.*]] = or i256 [[TMP13]], [[TMP18]] +; X64-MIC-AVX2-NEXT: [[TMP21:%.*]] = or i256 [[TMP19]], [[TMP20]] +; X64-MIC-AVX2-NEXT: [[TMP22:%.*]] = icmp ne i256 [[TMP21]], 0 +; X64-MIC-AVX2-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX2-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = icmp ne i512 [[TMP9]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length128_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length128_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length128_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length128_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length128_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length128_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length128_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length128_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length128_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length128_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 128) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length128_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $128, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length128_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length128_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 128) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length128_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX1-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX1-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX1-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length128_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length128_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512BW-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512BW-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512BW-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512BW-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512BW-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512BW-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX512BW-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512BW-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512BW-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512BW-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512BW-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length128_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length128_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX512F-256-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX512F-256-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX512F-256-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX512F-256-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX512F-256-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-256-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-AVX512F-256-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-AVX512F-256-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-AVX512F-256-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-AVX512F-256-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-AVX512F-256-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX512F-256-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-AVX512F-256-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-AVX512F-256-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-AVX512F-256-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length128_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length128_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-MIC-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-MIC-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-MIC-AVX2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP6]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP7]], 24064810364522754539996825585178935186817565138301605567169177049701086016820 +; X64-MIC-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 96 +; X64-MIC-AVX2-NEXT: [[TMP10:%.*]] = load i256, ptr [[TMP9]], align 1 +; X64-MIC-AVX2-NEXT: [[TMP11:%.*]] = xor i256 [[TMP10]], 24972983613442865430775334151281434151203991406697113551929636559217741018934 +; X64-MIC-AVX2-NEXT: [[TMP12:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-MIC-AVX2-NEXT: [[TMP13:%.*]] = or i256 [[TMP8]], [[TMP11]] +; X64-MIC-AVX2-NEXT: [[TMP14:%.*]] = or i256 [[TMP12]], [[TMP13]] +; X64-MIC-AVX2-NEXT: [[TMP15:%.*]] = icmp ne i256 [[TMP14]], 0 +; X64-MIC-AVX2-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i32 +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP16]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length128_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = icmp ne i512 [[TMP8]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP10]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length192( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length192( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length192( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length192( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length192( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length192( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length192( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length192( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length192( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length192( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length192_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $192, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length192_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP16]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP16]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = or i512 [[TMP14]], [[TMP13]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = icmp ne i512 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP16]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length192_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length192_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length192_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length192_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length192_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length192_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length192_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 192) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length192_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $192, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length192_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length192_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length192_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length192_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length192_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length192_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length192_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length192_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length192_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 192) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length192_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = or i512 [[TMP12]], [[TMP11]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = icmp ne i512 [[TMP13]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length255( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length255( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length255( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length255( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length255( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length255( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length255( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length255( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length255( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length255( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length255_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $255, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length255_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length255_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length255_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length255_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length255_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length255_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length255_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length255_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 255) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length255_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $255, %edx +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length255_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length255_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length255_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length255_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length255_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length255_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length255_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length255_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length255_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 255) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length255_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 191 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 191), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length256( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length256( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length256( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length256( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length256( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length256( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length256( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length256( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length256( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length256( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; X64-SSE-LABEL: length256_eq: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $256, %edx # imm = 0x100 +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setne %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length256_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512BW-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512BW-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512BW-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512BW-NEXT: ret i1 [[TMP22]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-AVX512F-NEXT: ret i1 [[TMP22]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr [[Y]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = load i512, ptr [[TMP5]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = xor i512 [[TMP6]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[Y]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = load i512, ptr [[TMP9]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = load i512, ptr [[TMP10]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = xor i512 [[TMP11]], [[TMP12]] +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Y]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = load i512, ptr [[TMP14]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = load i512, ptr [[TMP15]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = xor i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = or i512 [[TMP3]], [[TMP8]] +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = or i512 [[TMP13]], [[TMP18]] +; X64-MIC-AVX512F-NEXT: [[TMP21:%.*]] = or i512 [[TMP19]], [[TMP20]] +; X64-MIC-AVX512F-NEXT: [[TMP22:%.*]] = icmp ne i512 [[TMP21]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 +; X64-MIC-AVX512F-NEXT: ret i1 [[TMP22]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length256_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length256_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length256_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length256_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length256_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length256_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length256_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 256) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; X64-SSE-LABEL: length256_eq_const: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movl $.L.str, %esi +; X64-SSE-NEXT: movl $256, %edx # imm = 0x100 +; X64-SSE-NEXT: callq memcmp +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; +; X64-LABEL: define i1 @length256_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length256_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length256_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length256_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length256_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length256_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512BW-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512BW-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512BW-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512BW-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512BW-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512BW-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512BW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512BW-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512BW-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512BW-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512BW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512BW-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512BW-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-AVX512BW-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512BW-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512BW-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512BW-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512BW-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512BW-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length256_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length256_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length256_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 256) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length256_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[TMP1:%.*]] = load i512, ptr [[X]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP2:%.*]] = load i512, ptr @.str, align 1 +; X64-MIC-AVX512F-NEXT: [[TMP3:%.*]] = xor i512 [[TMP1]], [[TMP2]] +; X64-MIC-AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 64 +; X64-MIC-AVX512F-NEXT: [[TMP5:%.*]] = load i512, ptr [[TMP4]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP6:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 64), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP7:%.*]] = xor i512 [[TMP5]], [[TMP6]] +; X64-MIC-AVX512F-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 128 +; X64-MIC-AVX512F-NEXT: [[TMP9:%.*]] = load i512, ptr [[TMP8]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP10:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 128), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP11:%.*]] = xor i512 [[TMP9]], [[TMP10]] +; X64-MIC-AVX512F-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[X]], i64 192 +; X64-MIC-AVX512F-NEXT: [[TMP13:%.*]] = load i512, ptr [[TMP12]], align 1 +; X64-MIC-AVX512F-NEXT: [[TMP14:%.*]] = load i512, ptr getelementptr (i8, ptr @.str, i64 192), align 1 +; X64-MIC-AVX512F-NEXT: [[TMP15:%.*]] = xor i512 [[TMP13]], [[TMP14]] +; X64-MIC-AVX512F-NEXT: [[TMP16:%.*]] = or i512 [[TMP3]], [[TMP7]] +; X64-MIC-AVX512F-NEXT: [[TMP17:%.*]] = or i512 [[TMP11]], [[TMP15]] +; X64-MIC-AVX512F-NEXT: [[TMP18:%.*]] = or i512 [[TMP16]], [[TMP17]] +; X64-MIC-AVX512F-NEXT: [[TMP19:%.*]] = icmp ne i512 [[TMP18]], 0 +; X64-MIC-AVX512F-NEXT: [[TMP20:%.*]] = zext i1 [[TMP19]] to i32 +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[TMP20]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length384( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length384( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length384( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length384( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length384( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length384( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length384( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length384( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length384( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length384( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length384_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length384_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length384_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length384_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length384_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length384_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length384_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length384_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length384_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length384_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length384_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length384_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length384_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length384_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length384_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length384_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 384) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length511( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length511( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length511( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length511( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length511( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length511( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length511( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length511( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length511( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length511( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length511_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length511_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length511_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length511_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length511_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length511_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length511_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length511_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length511_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length511_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length511_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length511_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length511_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length511_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length511_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length511_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 511) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @length512( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @length512( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length512( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length512( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @length512( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @length512( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @length512( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @length512( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @length512( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @length512( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_lt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_lt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_lt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_lt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_lt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_lt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_lt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_lt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_lt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_lt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; X64-LABEL: define i1 @length512_gt( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-SSE41-LABEL: define i1 @length512_gt( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-SSE41-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length512_gt( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length512_gt( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_gt( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512BW-LABEL: define i1 @length512_gt( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512BW-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_gt( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[CMP]] +; +; X64-AVX512F-LABEL: define i1 @length512_gt( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-AVX512F-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_gt( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[CMP]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_gt( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[CMP]] +; + + + + + + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; X64-LABEL: define i1 @length512_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @length512_eq_const( +; X64-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length512_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length512_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @length512_eq_const( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @length512_eq_const( +; X64-AVX512BW-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @length512_eq_const( +; X64-AVX512F-256-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @length512_eq_const( +; X64-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @length512_eq_const( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @length512_eq_const( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 512) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks that we do not do stupid things with huge sizes. +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i32 @huge_length( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @huge_length( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @huge_length( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @huge_length( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @huge_length( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @huge_length( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @huge_length( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @huge_length( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @huge_length( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @huge_length( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; X64-LABEL: define i1 @huge_length_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @huge_length_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @huge_length_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @huge_length_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @huge_length_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @huge_length_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @huge_length_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @huge_length_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @huge_length_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @huge_length_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 9223372036854775807) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks non-constant sizes. +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { +; X64-LABEL: define i32 @nonconst_length( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-SSE41-LABEL: define i32 @nonconst_length( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-SSE41-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @nonconst_length( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @nonconst_length( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX2-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-256-LABEL: define i32 @nonconst_length( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: ret i32 [[M]] +; +; X64-AVX512BW-LABEL: define i32 @nonconst_length( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-NEXT: ret i32 [[M]] +; +; X64-AVX512F-256-LABEL: define i32 @nonconst_length( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-256-NEXT: ret i32 [[M]] +; +; X64-AVX512F-LABEL: define i32 @nonconst_length( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX2-LABEL: define i32 @nonconst_length( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: ret i32 [[M]] +; +; X64-MIC-AVX512F-LABEL: define i32 @nonconst_length( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: ret i32 [[M]] +; + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { +; X64-LABEL: define i1 @nonconst_length_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-SSE41-LABEL: define i1 @nonconst_length_eq( +; X64-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-SSE41-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @nonconst_length_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @nonconst_length_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-256-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512BW-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-256-NEXT: ret i1 [[C]] +; +; X64-AVX512BW-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512BW-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512BW-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512BW-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512BW-NEXT: ret i1 [[C]] +; +; X64-AVX512F-256-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512F-256-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-256-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-256-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-256-NEXT: ret i1 [[C]] +; +; X64-AVX512F-LABEL: define i1 @nonconst_length_eq( +; X64-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX512F-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX2-LABEL: define i1 @nonconst_length_eq( +; X64-MIC-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX2-NEXT: ret i1 [[C]] +; +; X64-MIC-AVX512F-LABEL: define i1 @nonconst_length_eq( +; X64-MIC-AVX512F-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; X64-MIC-AVX512F-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR5]] +; X64-MIC-AVX512F-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-MIC-AVX512F-NEXT: ret i1 [[C]] +; + + + + + + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll new file mode 100644 index 0000000000000..1ad91adb9e533 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-nobuiltin.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64_1LD +; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64_2LD + + +declare signext i32 @memcmp(ptr %src1, ptr %src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i32 @f1(ptr %src1, ptr %src2) { +; X64-LABEL: define i32 @f1( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]] +; X64-NEXT: ret i32 [[RES]] +; +; X64_1LD-LABEL: define i32 @f1( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]] +; X64_1LD-NEXT: ret i32 [[RES]] +; +; X64_2LD-LABEL: define i32 @f1( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 0) #[[ATTR0:[0-9]+]] +; X64_2LD-NEXT: ret i32 [[RES]] +; + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 0) nobuiltin + ret i32 %res +} + +; Check a case where the result is used as an integer. +define i32 @f2(ptr %src1, ptr %src2) { +; X64-LABEL: define i32 @f2( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]] +; X64-NEXT: ret i32 [[RES]] +; +; X64_1LD-LABEL: define i32 @f2( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]] +; X64_1LD-NEXT: ret i32 [[RES]] +; +; X64_2LD-LABEL: define i32 @f2( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) { +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 2) #[[ATTR0]] +; X64_2LD-NEXT: ret i32 [[RES]] +; + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 2) nobuiltin + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f3(ptr %src1, ptr %src2, ptr %dest) { +; X64-LABEL: define void @f3( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0 +; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64: store: +; X64-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64-NEXT: br label [[EXIT]] +; X64: exit: +; X64-NEXT: ret void +; +; X64_1LD-LABEL: define void @f3( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0 +; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_1LD: store: +; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_1LD-NEXT: br label [[EXIT]] +; X64_1LD: exit: +; X64_1LD-NEXT: ret void +; +; X64_2LD-LABEL: define void @f3( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 3) #[[ATTR0]] +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0 +; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_2LD: store: +; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_2LD-NEXT: br label [[EXIT]] +; X64_2LD: exit: +; X64_2LD-NEXT: ret void +; + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 3) nobuiltin + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, ptr %dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(ptr %src1, ptr %src2, ptr %dest) { +; X64-LABEL: define void @f4( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64-NEXT: entry: +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0 +; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64: store: +; X64-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64-NEXT: br label [[EXIT]] +; X64: exit: +; X64-NEXT: ret void +; +; X64_1LD-LABEL: define void @f4( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_1LD-NEXT: entry: +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0 +; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_1LD: store: +; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_1LD-NEXT: br label [[EXIT]] +; X64_1LD: exit: +; X64_1LD-NEXT: ret void +; +; X64_2LD-LABEL: define void @f4( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_2LD-NEXT: entry: +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 4) #[[ATTR0]] +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0 +; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_2LD: store: +; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_2LD-NEXT: br label [[EXIT]] +; X64_2LD: exit: +; X64_2LD-NEXT: ret void +; +entry: + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 4) nobuiltin + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, ptr %dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(ptr %src1, ptr %src2, ptr %dest) { +; X64-LABEL: define void @f5( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64-NEXT: entry: +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0 +; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64: store: +; X64-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64-NEXT: br label [[EXIT]] +; X64: exit: +; X64-NEXT: ret void +; +; X64_1LD-LABEL: define void @f5( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_1LD-NEXT: entry: +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0 +; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_1LD: store: +; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_1LD-NEXT: br label [[EXIT]] +; X64_1LD: exit: +; X64_1LD-NEXT: ret void +; +; X64_2LD-LABEL: define void @f5( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_2LD-NEXT: entry: +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 5) #[[ATTR0]] +; X64_2LD-NEXT: [[CMP:%.*]] = icmp slt i32 [[RES]], 0 +; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_2LD: store: +; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_2LD-NEXT: br label [[EXIT]] +; X64_2LD: exit: +; X64_2LD-NEXT: ret void +; +entry: + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 5) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, ptr %dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(ptr %src1, ptr %src2, ptr %dest) { +; X64-LABEL: define void @f6( +; X64-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64-NEXT: entry: +; X64-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0 +; X64-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64: store: +; X64-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64-NEXT: br label [[EXIT]] +; X64: exit: +; X64-NEXT: ret void +; +; X64_1LD-LABEL: define void @f6( +; X64_1LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_1LD-NEXT: entry: +; X64_1LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]] +; X64_1LD-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0 +; X64_1LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_1LD: store: +; X64_1LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_1LD-NEXT: br label [[EXIT]] +; X64_1LD: exit: +; X64_1LD-NEXT: ret void +; +; X64_2LD-LABEL: define void @f6( +; X64_2LD-SAME: ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[DEST:%.*]]) { +; X64_2LD-NEXT: entry: +; X64_2LD-NEXT: [[RES:%.*]] = call i32 @memcmp(ptr [[SRC1]], ptr [[SRC2]], i64 6) #[[ATTR0]] +; X64_2LD-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RES]], 0 +; X64_2LD-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[STORE:%.*]] +; X64_2LD: store: +; X64_2LD-NEXT: store i32 0, ptr [[DEST]], align 4 +; X64_2LD-NEXT: br label [[EXIT]] +; X64_2LD: exit: +; X64_2LD-NEXT: ret void +; +entry: + %res = call i32 @memcmp(ptr %src1, ptr %src2, i64 6) nobuiltin + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, ptr %dest + br label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll new file mode 100644 index 0000000000000..b36c0db432820 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize-x32.ll @@ -0,0 +1,870 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i32) +declare dso_local i32 @bcmp(ptr, ptr, i32) + +define i32 @length2(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: ret i32 [[TMP7]] +; +; X86-SSE2-LABEL: define i32 @length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length2_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind optsize { +; X86-LABEL: define i1 @length2_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: ret i1 [[TMP2]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length3( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length3( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length3_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length3_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length4( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-NEXT: ret i32 [[TMP9]] +; +; X86-SSE2-LABEL: define i32 @length4( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length4_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i1 [[TMP3]] +; +; X86-SSE2-LABEL: define i1 @length4_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind optsize { +; X86-LABEL: define i1 @length4_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length5( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length5( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length5_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length5_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length8( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length8( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length8_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind optsize { +; X86-LABEL: define i1 @length8_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-NEXT: ret i1 [[TMP7]] +; +; X86-SSE2-LABEL: define i1 @length8_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i1 @length12_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length12_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length12( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length12( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length16( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length16( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize { +; X86-NOSSE-LABEL: length16_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind optsize { +; X86-NOSSE-LABEL: length16_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length16_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length24( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length24( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize { +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind optsize { +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length24_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length32( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length32( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize { +; X86-NOSSE-LABEL: length32_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind optsize { +; X86-NOSSE-LABEL: length32_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @length64( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length64( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize { +; X86-LABEL: define i1 @length64_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind optsize { +; X86-LABEL: define i1 @length64_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length64_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize { +; X86-LABEL: define i32 @bcmp_length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i32 [[TMP4]] +; +; X86-SSE2-LABEL: define i32 @bcmp_length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll new file mode 100644 index 0000000000000..cb6c5e6da1c79 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-optsize.ll @@ -0,0 +1,1414 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) +declare dso_local i32 @bcmp(ptr, ptr, i64) + +define i32 @length2(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] +; +; X64-AVX1-LABEL: define i32 @length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: ret i32 [[TMP7]] +; +; X64-AVX2-LABEL: define i32 @length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length2_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind optsize { +; X64-LABEL: define i1 @length2_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length3( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length3( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length3( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length3_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length3_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length3_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length4( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length4( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length4( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length4_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length4_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length4_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind optsize { +; X64-LABEL: define i1 @length4_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length5( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length5( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length5( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length5_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length5_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length5_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length8( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length8( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length8( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length8_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind optsize { +; X64-LABEL: define i1 @length8_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length8_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length8_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i1 @length12_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length12_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length12_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length12( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length12( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length12( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length16( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length16( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length16( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind optsize { +; X64-SSE2-LABEL: length16_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; X64-SSE2-NEXT: pmovmskb %xmm1, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length16_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length16_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length16_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind optsize { +; X64-SSE2-LABEL: length16_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length16_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length16_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length16_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length24( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR3:[0-9]+]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length24( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length24( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind optsize { +; X64-SSE2-LABEL: length24_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pmovmskb %xmm2, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length24_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length24_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind optsize { +; X64-SSE2-LABEL: length24_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pand %xmm1, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-AVX-LABEL: length24_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length24_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-NEXT: ret i1 [[TMP8]] +; +; X64-AVX1-LABEL: define i1 @length24_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length24_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length32( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR3]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length32( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length32( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind optsize { +; X64-SSE2-LABEL: length32_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: movdqu (%rsi), %xmm2 +; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2 +; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 +; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length32_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind optsize { +; X64-SSE2-LABEL: length32_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movdqu (%rdi), %xmm0 +; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pand %xmm1, %xmm0 +; X64-SSE2-NEXT: pmovmskb %xmm0, %eax +; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length32_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @length64( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR3]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length64( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length64( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind optsize { +; X64-SSE2-LABEL: length64_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: movl $64, %edx +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length64_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR3]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length64_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind optsize { +; X64-SSE2-LABEL: length64_eq_const: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: movl $.L.str, %esi +; X64-SSE2-NEXT: movl $64, %edx +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-LABEL: define i1 @length64_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR3]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length64_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length64_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind optsize { +; X64-LABEL: define i32 @bcmp_length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i32 [[TMP4]] +; +; X64-AVX1-LABEL: define i32 @bcmp_length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i32 [[TMP4]] +; +; X64-AVX2-LABEL: define i32 @bcmp_length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll new file mode 100644 index 0000000000000..a8b054cd20e27 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso-x32.ll @@ -0,0 +1,887 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i32) +declare dso_local i32 @bcmp(ptr, ptr, i32) + +define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: ret i32 [[TMP7]] +; +; X86-SSE2-LABEL: define i32 @length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length2_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind !prof !14 { +; X86-LABEL: define i1 @length2_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: ret i1 [[TMP2]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR3:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length3( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length3( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length3_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length3_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length4( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-NEXT: ret i32 [[TMP9]] +; +; X86-SSE2-LABEL: define i32 @length4( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length4_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i1 [[TMP3]] +; +; X86-SSE2-LABEL: define i1 @length4_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind !prof !14 { +; X86-LABEL: define i1 @length4_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length5( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length5( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length5_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length5_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length8( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length8( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length8_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind !prof !14 { +; X86-LABEL: define i1 @length8_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-NEXT: ret i1 [[TMP7]] +; +; X86-SSE2-LABEL: define i1 @length8_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i1 @length12_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length12_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length12( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length12( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length16( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length16( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 { +; X86-NOSSE-LABEL: length16_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind !prof !14 { +; X86-NOSSE-LABEL: length16_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length16_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length24( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length24( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 { +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind !prof !14 { +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length24_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length32( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length32( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 { +; X86-NOSSE-LABEL: length32_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind !prof !14 { +; X86-NOSSE-LABEL: length32_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @length64( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length64( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 { +; X86-LABEL: define i1 @length64_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind !prof !14 { +; X86-LABEL: define i1 @length64_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length64_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR4]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 { +; X86-LABEL: define i32 @bcmp_length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i32 [[TMP4]] +; +; X86-SSE2-LABEL: define i32 @bcmp_length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @bcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i32 10000} +!4 = !{!"MaxCount", i32 10} +!5 = !{!"MaxInternalCount", i32 1} +!6 = !{!"MaxFunctionCount", i32 1000} +!7 = !{!"NumCounts", i32 3} +!8 = !{!"NumFunctions", i32 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i32 100, i32 1} +!12 = !{i32 999000, i32 100, i32 1} +!13 = !{i32 999999, i32 1, i32 2} +!14 = !{!"function_entry_count", i32 0} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll new file mode 100644 index 0000000000000..1507cbdc4e86e --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-pgso.ll @@ -0,0 +1,1347 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=X64 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=X64-AVX1 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s --check-prefix=X64-AVX2 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) +declare dso_local i32 @bcmp(ptr, ptr, i64) + +define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF14:![0-9]+]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] +; +; X64-AVX1-LABEL: define i32 @length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: ret i32 [[TMP7]] +; +; X64-AVX2-LABEL: define i32 @length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF14:![0-9]+]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-AVX2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length2_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind !prof !14 { +; X64-LABEL: define i1 @length2_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR2:[0-9]+]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR3:[0-9]+]] +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length3( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length3( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length3( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length3_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length3_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length3_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length4( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length4( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length4( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length4_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length4_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length4_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind !prof !14 { +; X64-LABEL: define i1 @length4_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length4_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length4_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length5( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br label [[ENDBLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length5( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br label [[ENDBLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length5( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-AVX2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br label [[ENDBLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length5_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length5_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length5_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length8( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] +; +; X64-AVX1-LABEL: define i32 @length8( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX1-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX1-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX1-NEXT: ret i32 [[TMP9]] +; +; X64-AVX2-LABEL: define i32 @length8( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-AVX2-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-AVX2-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-AVX2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-AVX2-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length8_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length8_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length8_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind !prof !14 { +; X64-LABEL: define i1 @length8_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: ret i1 [[TMP2]] +; +; X64-AVX1-LABEL: define i1 @length8_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length8_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i1 @length12_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: ret i1 [[TMP12]] +; +; X64-AVX1-LABEL: define i1 @length12_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP12]] +; +; X64-AVX2-LABEL: define i1 @length12_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length12( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length12( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX1-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX1-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length12( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-AVX2-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-AVX2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-AVX2-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length16( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: br label [[LOADBB:%.*]] +; X64: res_block: +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64: loadbb1: +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64: endblock: +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX1-LABEL: define i32 @length16( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: br label [[LOADBB:%.*]] +; X64-AVX1: res_block: +; X64-AVX1-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX1-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX1-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX1-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX1: loadbb: +; X64-AVX1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX1-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX1: loadbb1: +; X64-AVX1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX1-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX1-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX1: endblock: +; X64-AVX1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX1-NEXT: ret i32 [[PHI_RES]] +; +; X64-AVX2-LABEL: define i32 @length16( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: br label [[LOADBB:%.*]] +; X64-AVX2: res_block: +; X64-AVX2-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-AVX2-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X64-AVX2-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-AVX2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X64-AVX2-NEXT: br label [[ENDBLOCK:%.*]] +; X64-AVX2: loadbb: +; X64-AVX2-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-AVX2-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-AVX2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-AVX2: loadbb1: +; X64-AVX2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-AVX2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-AVX2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-AVX2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-AVX2-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-AVX2-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-AVX2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-AVX2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-AVX2: endblock: +; X64-AVX2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-AVX2-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 { +; +; X64-AVX-LABEL: length16_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i1 [[TMP3]] +; +; X64-AVX1-LABEL: define i1 @length16_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP3]] +; +; X64-AVX2-LABEL: define i1 @length16_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind !prof !14 { +; +; X64-AVX-LABEL: length16_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length16_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length16_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length16_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length24( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length24( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length24( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 24) #[[ATTR4:[0-9]+]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 { +; +; X64-AVX-LABEL: length24_eq: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length24_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length24_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX1-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX1-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX1-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length24_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; X64-AVX2-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; X64-AVX2-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; X64-AVX2-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; X64-AVX2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind !prof !14 { +; +; X64-AVX-LABEL: length24_eq_const: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vptest %xmm0, %xmm0 +; X64-AVX-NEXT: setne %al +; X64-AVX-NEXT: retq +; X64-LABEL: define i1 @length24_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-NEXT: ret i1 [[TMP8]] +; +; X64-AVX1-LABEL: define i1 @length24_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX1-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX1-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX1-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX1-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP8]] +; +; X64-AVX2-LABEL: define i1 @length24_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; X64-AVX2-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; X64-AVX2-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; X64-AVX2-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; X64-AVX2-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP8]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length32( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length32( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length32( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 32) #[[ATTR4]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 { +; +; X64-LABEL: define i1 @length32_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X64-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X64-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X64-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X64-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length32_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX1-NEXT: ret i1 [[CMP]] +; +; X64-AVX2-LABEL: define i1 @length32_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; X64-AVX2-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind !prof !14 { +; +; X64-LABEL: define i1 @length32_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X64-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X64-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X64-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: ret i1 [[TMP7]] +; +; X64-AVX1-LABEL: define i1 @length32_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP2]] +; +; X64-AVX2-LABEL: define i1 @length32_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = icmp ne i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @length64( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: ret i32 [[M]] +; +; X64-AVX1-LABEL: define i32 @length64( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]] +; X64-AVX1-NEXT: ret i32 [[M]] +; +; X64-AVX2-LABEL: define i32 @length64( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR4]] +; X64-AVX2-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 { +; +; X64-LABEL: define i1 @length64_eq( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 64) #[[ATTR0]] +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X64-NEXT: ret i1 [[CMP]] +; +; X64-AVX1-LABEL: define i1 @length64_eq( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX1-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX1-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX1-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX1-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX1-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX1-NEXT: ret i1 [[TMP10]] +; +; X64-AVX2-LABEL: define i1 @length64_eq( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i256, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = xor i256 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 32 +; X64-AVX2-NEXT: [[TMP6:%.*]] = load i256, ptr [[TMP4]], align 1 +; X64-AVX2-NEXT: [[TMP7:%.*]] = load i256, ptr [[TMP5]], align 1 +; X64-AVX2-NEXT: [[TMP8:%.*]] = xor i256 [[TMP6]], [[TMP7]] +; X64-AVX2-NEXT: [[TMP9:%.*]] = or i256 [[TMP3]], [[TMP8]] +; X64-AVX2-NEXT: [[TMP10:%.*]] = icmp ne i256 [[TMP9]], 0 +; X64-AVX2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64-AVX2-NEXT: ret i1 [[TMP10]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind !prof !14 { +; +; X64-LABEL: define i1 @length64_eq_const( +; X64-SAME: ptr [[X:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i64 64) #[[ATTR0]] +; X64-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X64-NEXT: ret i1 [[C]] +; +; X64-AVX1-LABEL: define i1 @length64_eq_const( +; X64-AVX1-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX1-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX1-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX1-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX1-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX1-NEXT: ret i1 [[C]] +; +; X64-AVX2-LABEL: define i1 @length64_eq_const( +; X64-AVX2-SAME: ptr [[X:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i256, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = xor i256 [[TMP1]], 22248533154802671749360035741805466271990224543450513484713781259640245465392 +; X64-AVX2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 32 +; X64-AVX2-NEXT: [[TMP4:%.*]] = load i256, ptr [[TMP3]], align 1 +; X64-AVX2-NEXT: [[TMP5:%.*]] = xor i256 [[TMP4]], 23156637116659864195145731957391441738757757709540232586892941433547502400306 +; X64-AVX2-NEXT: [[TMP6:%.*]] = or i256 [[TMP2]], [[TMP5]] +; X64-AVX2-NEXT: [[TMP7:%.*]] = icmp ne i256 [[TMP6]], 0 +; X64-AVX2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X64-AVX2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; X64-AVX2-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 { +; X64-LABEL: define i32 @bcmp_length2( +; X64-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR0]] !prof [[PROF14]] { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: ret i32 [[TMP4]] +; +; X64-AVX1-LABEL: define i32 @bcmp_length2( +; X64-AVX1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX1-NEXT: ret i32 [[TMP4]] +; +; X64-AVX2-LABEL: define i32 @bcmp_length2( +; X64-AVX2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] !prof [[PROF14]] { +; X64-AVX2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-AVX2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-AVX2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-AVX2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-AVX2-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll new file mode 100644 index 0000000000000..8c86c110c7bb2 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32-2.ll @@ -0,0 +1,4813 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov < %s | FileCheck %s --check-prefix=X86 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse < %s | FileCheck %s --check-prefix=X86-SSE1 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=X86-SSE2 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X86-SSE41 + +; This tests codegen time inlining/optimization of memcmp +; rdar://6480398 + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i32) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length0( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-NEXT: ret i32 0 +; +; X86-SSE1-LABEL: define i32 @length0( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE1-NEXT: ret i32 0 +; +; X86-SSE2-LABEL: define i32 @length0( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE2-NEXT: ret i32 0 +; +; X86-SSE41-LABEL: define i32 @length0( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; X86-SSE41-NEXT: ret i32 0 +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length0_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: ret i1 true +; +; X86-SSE1-LABEL: define i1 @length0_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: ret i1 true +; +; X86-SSE2-LABEL: define i1 @length0_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: ret i1 true +; +; X86-SSE41-LABEL: define i1 @length0_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: ret i1 true +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length0_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: ret i1 false +; +; X86-SSE1-LABEL: define i1 @length0_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: ret i1 false +; +; X86-SSE2-LABEL: define i1 @length0_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: ret i1 false +; +; X86-SSE41-LABEL: define i1 @length0_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: ret i1 false +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length2( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: ret i32 [[TMP7]] +; +; X86-SSE1-LABEL: define i32 @length2( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: ret i32 [[TMP7]] +; +; X86-SSE2-LABEL: define i32 @length2( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: ret i32 [[TMP7]] +; +; X86-SSE41-LABEL: define i32 @length2( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + ret i32 %m +} + +define i32 @length2_const(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length2_const( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-NEXT: ret i32 [[TMP4]] +; +; X86-SSE1-LABEL: define i32 @length2_const( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE1-NEXT: ret i32 [[TMP4]] +; +; X86-SSE2-LABEL: define i32 @length2_const( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE2-NEXT: ret i32 [[TMP4]] +; +; X86-SSE41-LABEL: define i32 @length2_const( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE41-NEXT: ret i32 [[TMP4]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_gt_const( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_gt_const( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE1-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_gt_const( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_gt_const( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 12594 +; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP4]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE1-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE1-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE2-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE2-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X86-SSE41-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X86-SSE41-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length2_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: ret i1 [[TMP2]] +; +; X86-SSE1-LABEL: define i1 @length2_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP2]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP2]] +; +; X86-SSE41-LABEL: define i1 @length2_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length2_eq_nobuiltin_attr( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 2) #[[ATTR4:[0-9]+]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length3( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length3( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length3( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length3( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length3_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE1-LABEL: define i1 @length3_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE1-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length3_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; +; X86-SSE41-LABEL: define i1 @length3_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X86-SSE41-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length4( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-NEXT: ret i32 [[TMP9]] +; +; X86-SSE1-LABEL: define i32 @length4( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE1-NEXT: ret i32 [[TMP9]] +; +; X86-SSE2-LABEL: define i32 @length4( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE2-NEXT: ret i32 [[TMP9]] +; +; X86-SSE41-LABEL: define i32 @length4( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X86-SSE41-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X86-SSE41-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-NEXT: ret i1 [[TMP3]] +; +; X86-SSE1-LABEL: define i1 @length4_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP3]] +; +; X86-SSE2-LABEL: define i1 @length4_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; +; X86-SSE41-LABEL: define i1 @length4_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-NEXT: ret i1 [[TMP5]] +; +; X86-SSE1-LABEL: define i1 @length4_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: ret i1 [[TMP5]] +; +; X86-SSE2-LABEL: define i1 @length4_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: ret i1 [[TMP5]] +; +; X86-SSE41-LABEL: define i1 @length4_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length4_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-NEXT: ret i1 [[TMP5]] +; +; X86-SSE1-LABEL: define i1 @length4_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE1-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE1-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE1-NEXT: ret i1 [[TMP5]] +; +; X86-SSE2-LABEL: define i1 @length4_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE2-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE2-NEXT: ret i1 [[TMP5]] +; +; X86-SSE41-LABEL: define i1 @length4_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X86-SSE41-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X86-SSE41-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X86-SSE41-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length4_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length4_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE1-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length4_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length4_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length5( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length5( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length5( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length5( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length5_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-NEXT: ret i1 [[TMP12]] +; +; X86-SSE1-LABEL: define i1 @length5_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE1-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE1-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE1-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP12]] +; +; X86-SSE2-LABEL: define i1 @length5_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE2-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE2-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE2-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP12]] +; +; X86-SSE41-LABEL: define i1 @length5_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X86-SSE41-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X86-SSE41-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X86-SSE41-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X86-SSE41-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length5_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br label [[ENDBLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length5_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE1-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br label [[ENDBLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length5_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br label [[ENDBLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length5_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X86-SSE41-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br label [[ENDBLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length7( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length7( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length7( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length7( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + ret i32 %m +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length7_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length7_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length7_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length7_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length7_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: ret i1 [[TMP10]] +; +; X86-SSE1-LABEL: define i1 @length7_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP10]] +; +; X86-SSE2-LABEL: define i1 @length7_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP10]] +; +; X86-SSE41-LABEL: define i1 @length7_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP10]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length8( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: br label [[LOADBB:%.*]] +; X86: res_block: +; X86-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-NEXT: br label [[ENDBLOCK:%.*]] +; X86: loadbb: +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86: loadbb1: +; X86-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86: endblock: +; X86-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE1-LABEL: define i32 @length8( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: br label [[LOADBB:%.*]] +; X86-SSE1: res_block: +; X86-SSE1-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE1-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE1-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE1: loadbb: +; X86-SSE1-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE1-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE1-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE1: loadbb1: +; X86-SSE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE1-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE1-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE1-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE1-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE1: endblock: +; X86-SSE1-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE1-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE2-LABEL: define i32 @length8( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: br label [[LOADBB:%.*]] +; X86-SSE2: res_block: +; X86-SSE2-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE2-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE2-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE2-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE2: loadbb: +; X86-SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE2-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE2-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE2: loadbb1: +; X86-SSE2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE2-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE2-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE2-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE2: endblock: +; X86-SSE2-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE2-NEXT: ret i32 [[PHI_RES]] +; +; X86-SSE41-LABEL: define i32 @length8( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: br label [[LOADBB:%.*]] +; X86-SSE41: res_block: +; X86-SSE41-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X86-SSE41-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; X86-SSE41-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X86-SSE41-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; X86-SSE41-NEXT: br label [[ENDBLOCK:%.*]] +; X86-SSE41: loadbb: +; X86-SSE41-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X86-SSE41-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X86-SSE41-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X86-SSE41: loadbb1: +; X86-SSE41-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X86-SSE41-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X86-SSE41-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X86-SSE41-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X86-SSE41-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X86-SSE41-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X86-SSE41: endblock: +; X86-SSE41-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X86-SSE41-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length8_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length8_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE1-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE1-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE1-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length8_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length8_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length8_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-NEXT: ret i1 [[TMP7]] +; +; X86-SSE1-LABEL: define i1 @length8_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE1-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE1-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE1-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE1-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE1-NEXT: ret i1 [[TMP7]] +; +; X86-SSE2-LABEL: define i1 @length8_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length8_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 858927408 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 926299444 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length9_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length9_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length9_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length9_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) #[[ATTR5:[0-9]+]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length10_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length10_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length10_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length10_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length11_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length11_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length11_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length11_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length12_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length12_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length12_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length12_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length12( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length12( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length12( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length12( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length13_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length13_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length13_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length13_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length14_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length14_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length14_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length14_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length15(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length15( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length15( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length15( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length15( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind + ret i32 %m +} + +define i1 @length15_lt(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length15_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length15_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length15_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length15_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp slt i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length15_const(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length15_const( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length15_const( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length15_const( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length15_const( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind + ret i32 %m +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length15_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length15_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length15_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length15_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @length15_gt_const( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length15_gt_const( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length15_gt_const( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length15_gt_const( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp sgt i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length16( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length16( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length16( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length16( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length16_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length16_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP3]] +; +; X86-SSE41-LABEL: define i1 @length16_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length16_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length16_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length16_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length16_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length16_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length16_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length16_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length16_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length16_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length16_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 16) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length16_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length16_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length24( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length24( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length24( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length24( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length24_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length24_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length24_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length24_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length24_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length24_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 24) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length24_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $24 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length24_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length24_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 24) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length24_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length24_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 68051240286688436651889234231545575736 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length31( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length31( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length31( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length31( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length31_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length31_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length31_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length31_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length31_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 15 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length31_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $31 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length31_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length31_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 31) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length31_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length31_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 15 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 64100044907875699958541276911416849973 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length32( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length32( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length32( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length32( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; X86-NOSSE-LABEL: length32_eq: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length32_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length32_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-NOSSE-LABEL: length32_eq_prefer128: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE2-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE2-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE2-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE2-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE2-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE2-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE2-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length32_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X86-SSE41-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; X86-SSE41-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; X86-SSE41-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; X86-SSE41-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; X86-SSE41-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; X86-SSE41-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; X86-SSE41-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; X86-SSE41-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; X86-NOSSE-LABEL: length32_eq_const: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl $32 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-LABEL: define i1 @length32_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length32_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 32) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length32_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE2-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE2-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE2-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE2-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE2-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE2-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE2-NEXT: ret i1 [[TMP7]] +; +; X86-SSE41-LABEL: define i1 @length32_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X86-SSE41-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; X86-SSE41-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; X86-SSE41-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; X86-SSE41-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; X86-SSE41-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; X86-SSE41-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; X86-SSE41-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; X86-SSE41-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length48( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length48( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length48( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length48( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length48_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length48_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length48_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; X86-LABEL: define i1 @length48_eq_prefer128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length48_eq_prefer128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR2]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length48_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length48_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length48_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length48_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 48) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp ne i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length63( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length63( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length63( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length63( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length63_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length63_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length63_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length63_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length63_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length63_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length63_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length63_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length63_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length63_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length63_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length63_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 63) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length64( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length64( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length64( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length64( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length64_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length64_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length64_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length64_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length64_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length64_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length64_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length64_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length64_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length64_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length64_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length64_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 64) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length96( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length96( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length96( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length96( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length96_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length96_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length96_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length96_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length96_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length96_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length96_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length96_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 96) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length127( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length127( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length127( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length127( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length127_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length127_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length127_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length127_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length127_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length127_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length127_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length127_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 127) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length128( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length128( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length128( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length128( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length128_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length128_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length128_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length128_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length128_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length128_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length128_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length128_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 128) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length192( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length192( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length192( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length192( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length192_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length192_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length192_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length192_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length192_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length192_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length192_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length192_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 192) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length255( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length255( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length255( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length255( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length255_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length255_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length255_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length255_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length255_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length255_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length255_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length255_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 255) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length256( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length256( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length256( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length256( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length256_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length256_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length256_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length256_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length256_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length256_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length256_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length256_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 256) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length384( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length384( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length384( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length384( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length384_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length384_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length384_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length384_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length384_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length384_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length384_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length384_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 384) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length511( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length511( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length511( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length511( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length511_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length511_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length511_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length511_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length511_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length511_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length511_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length511_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 511) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @length512( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @length512( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @length512( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @length512( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_lt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_lt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_lt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_lt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; X86-LABEL: define i1 @length512_gt( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-NEXT: ret i1 [[CMP]] +; +; X86-SSE1-LABEL: define i1 @length512_gt( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE1-NEXT: ret i1 [[CMP]] +; +; X86-SSE2-LABEL: define i1 @length512_gt( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE2-NEXT: ret i1 [[CMP]] +; +; X86-SSE41-LABEL: define i1 @length512_gt( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; X86-SSE41-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; X86-LABEL: define i1 @length512_eq_const( +; X86-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @length512_eq_const( +; X86-SSE1-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @length512_eq_const( +; X86-SSE2-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @length512_eq_const( +; X86-SSE41-SAME: ptr [[X:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr @.str, i32 512) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i32 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks that we do not do stupid things with huge sizes. +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i32 @huge_length( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @huge_length( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @huge_length( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @huge_length( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; X86-LABEL: define i1 @huge_length_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @huge_length_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @huge_length_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @huge_length_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 -1) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; This checks non-constant sizes. +define i32 @nonconst_length(ptr %X, ptr %Y, i32 %size) nounwind { +; X86-LABEL: define i32 @nonconst_length( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-NEXT: ret i32 [[M]] +; +; X86-SSE1-LABEL: define i32 @nonconst_length( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE1-NEXT: ret i32 [[M]] +; +; X86-SSE2-LABEL: define i32 @nonconst_length( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE2-NEXT: ret i32 [[M]] +; +; X86-SSE41-LABEL: define i32 @nonconst_length( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE41-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i32 %size) nounwind { +; X86-LABEL: define i1 @nonconst_length_eq( +; X86-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-NEXT: ret i1 [[C]] +; +; X86-SSE1-LABEL: define i1 @nonconst_length_eq( +; X86-SSE1-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE1-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE1-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE1-NEXT: ret i1 [[C]] +; +; X86-SSE2-LABEL: define i1 @nonconst_length_eq( +; X86-SSE2-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE2-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE2-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE2-NEXT: ret i1 [[C]] +; +; X86-SSE41-LABEL: define i1 @nonconst_length_eq( +; X86-SSE41-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[SIZE:%.*]]) #[[ATTR1]] { +; X86-SSE41-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 [[SIZE]]) #[[ATTR5]] +; X86-SSE41-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; X86-SSE41-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll index d71ae8be19b66..5a0f4db363536 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll @@ -1,64 +1,66 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32 declare i32 @memcmp(ptr nocapture, ptr nocapture, i32) define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X32-NEXT: ret i32 [[TMP9]] +; X32-LABEL: define i32 @cmp2( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X32-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X32-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2) ret i32 %call } define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { -; X32-LABEL: @cmp2_align2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2 -; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X32-NEXT: ret i32 [[TMP9]] +; X32-LABEL: define i32 @cmp2_align2( +; X32-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2 +; X32-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X32-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X32-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2) ret i32 %call } define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp3( +; X32-LABEL: define i32 @cmp3( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]] +; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X32-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X32-NEXT: br label [[ENDBLOCK]] ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 3) @@ -66,47 +68,49 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] -; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X32-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X32-NEXT: ret i32 [[TMP11]] +; X32-LABEL: define i32 @cmp4( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X32-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X32-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X32-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 4) ret i32 %call } define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp5( +; X32-LABEL: define i32 @cmp5( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]] +; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X32-NEXT: br label [[ENDBLOCK]] ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 5) @@ -114,32 +118,33 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp6( +; X32-LABEL: define i32 @cmp6( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; X32-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 -; X32-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] -; X32-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; X32-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; X32-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32 +; X32-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32 +; X32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] +; X32-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -149,30 +154,31 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp7( +; X32-LABEL: define i32 @cmp7( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -182,30 +188,31 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp8( +; X32-LABEL: define i32 @cmp8( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -215,8 +222,9 @@ define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp9( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 9) +; X32-LABEL: define i32 @cmp9( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 9) @@ -224,8 +232,9 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp10( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 10) +; X32-LABEL: define i32 @cmp10( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 10) @@ -233,8 +242,9 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp11( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 11) +; X32-LABEL: define i32 @cmp11( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 11) @@ -242,8 +252,9 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp12( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 12) +; X32-LABEL: define i32 @cmp12( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 12) @@ -251,8 +262,9 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp13( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 13) +; X32-LABEL: define i32 @cmp13( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 13) @@ -260,8 +272,9 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp14( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 14) +; X32-LABEL: define i32 @cmp14( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 14) @@ -269,8 +282,9 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp15( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 15) +; X32-LABEL: define i32 @cmp15( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 15) @@ -278,8 +292,9 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp16( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 16) +; X32-LABEL: define i32 @cmp16( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) ; X32-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 16) @@ -287,12 +302,13 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X32-LABEL: define i32 @cmp_eq2( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -303,21 +319,22 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq3( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-LABEL: define i32 @cmp_eq3( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X32-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -328,12 +345,13 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X32-LABEL: define i32 @cmp_eq4( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -344,21 +362,22 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq5( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-LABEL: define i32 @cmp_eq5( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -369,21 +388,22 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq6( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X32-LABEL: define i32 @cmp_eq6( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -394,21 +414,22 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) { -; X32-LABEL: @cmp_eq6_align4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X32-LABEL: define i32 @cmp_eq6_align4( +; X32-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -419,19 +440,20 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read } define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq7( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-LABEL: define i32 @cmp_eq7( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -442,19 +464,20 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq8( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-LABEL: define i32 @cmp_eq8( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -465,8 +488,9 @@ define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq9( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 9) +; X32-LABEL: define i32 @cmp_eq9( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 9) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -478,8 +502,9 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq10( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 10) +; X32-LABEL: define i32 @cmp_eq10( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 10) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -491,8 +516,9 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq11( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 11) +; X32-LABEL: define i32 @cmp_eq11( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 11) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -504,8 +530,9 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq12( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 12) +; X32-LABEL: define i32 @cmp_eq12( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 12) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -517,8 +544,9 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq13( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 13) +; X32-LABEL: define i32 @cmp_eq13( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 13) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -530,8 +558,9 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq14( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 14) +; X32-LABEL: define i32 @cmp_eq14( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 14) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -543,8 +572,9 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq15( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 15) +; X32-LABEL: define i32 @cmp_eq15( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 15) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] @@ -556,8 +586,9 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X32-LABEL: @cmp_eq16( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X:%.*]], ptr [[Y:%.*]], i32 16) +; X32-LABEL: define i32 @cmp_eq16( +; X32-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i32 16) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll index f686e29975564..99100aad3ee84 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -1,66 +1,67 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X64-NEXT: ret i32 [[TMP9]] +; X64-LABEL: define i32 @cmp2( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) ret i32 %call } define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { -; X64-LABEL: @cmp2_align2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2 -; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X64-NEXT: ret i32 [[TMP9]] +; X64-LABEL: define i32 @cmp2_align2( +; X64-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2 +; X64-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) ret i32 %call } define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp3( +; X64-LABEL: define i32 @cmp3( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) @@ -68,47 +69,49 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp4( -; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X64-NEXT: ret i32 [[TMP11]] +; X64-LABEL: define i32 @cmp4( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) ret i32 %call } define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp5( +; X64-LABEL: define i32 @cmp5( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) @@ -116,32 +119,33 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp6( +; X64-LABEL: define i32 @cmp6( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32 +; X64-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -151,30 +155,31 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp7( +; X64-LABEL: define i32 @cmp7( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -184,47 +189,49 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp8( -; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X64-NEXT: ret i32 [[TMP11]] +; X64-LABEL: define i32 @cmp8( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) ret i32 %call } define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp9( +; X64-LABEL: define i32 @cmp9( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) @@ -232,32 +239,33 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp10( +; X64-LABEL: define i32 @cmp10( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -267,30 +275,31 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp11( +; X64-LABEL: define i32 @cmp11( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -300,32 +309,33 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp12( +; X64-LABEL: define i32 @cmp12( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; X64-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -335,30 +345,31 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp13( +; X64-LABEL: define i32 @cmp13( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -368,30 +379,31 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp14( +; X64-LABEL: define i32 @cmp14( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -401,30 +413,31 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp15( +; X64-LABEL: define i32 @cmp15( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -434,30 +447,31 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp16( +; X64-LABEL: define i32 @cmp16( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -467,12 +481,13 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp_eq2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-LABEL: define i32 @cmp_eq2( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -483,43 +498,45 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq3( +; X64_1LD-LABEL: define i32 @cmp_eq3( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq3( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq3( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -530,12 +547,13 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp_eq4( -; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-LABEL: define i32 @cmp_eq4( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -546,43 +564,45 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq5( +; X64_1LD-LABEL: define i32 @cmp_eq5( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq5( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq5( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -593,43 +613,45 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq6( +; X64_1LD-LABEL: define i32 @cmp_eq6( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq6( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq6( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -640,43 +662,45 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) { -; X64_1LD-LABEL: @cmp_eq6_align4( +; X64_1LD-LABEL: define i32 @cmp_eq6_align4( +; X64_1LD-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq6_align4( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq6_align4( +; X64_2LD-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -687,41 +711,43 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read } define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq7( +; X64_1LD-LABEL: define i32 @cmp_eq7( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq7( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq7( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -732,12 +758,13 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp_eq8( -; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-LABEL: define i32 @cmp_eq8( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -748,43 +775,45 @@ define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq9( +; X64_1LD-LABEL: define i32 @cmp_eq9( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq9( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq9( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -795,43 +824,45 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq10( +; X64_1LD-LABEL: define i32 @cmp_eq10( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq10( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq10( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -842,41 +873,43 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq11( +; X64_1LD-LABEL: define i32 @cmp_eq11( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq11( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq11( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -887,43 +920,45 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq12( +; X64_1LD-LABEL: define i32 @cmp_eq12( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq12( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq12( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -934,41 +969,43 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq13( +; X64_1LD-LABEL: define i32 @cmp_eq13( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq13( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq13( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -979,41 +1016,43 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq14( +; X64_1LD-LABEL: define i32 @cmp_eq14( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq14( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq14( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1024,41 +1063,43 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64_1LD-LABEL: @cmp_eq15( +; X64_1LD-LABEL: define i32 @cmp_eq15( +; X64_1LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { ; X64_1LD-NEXT: br label [[LOADBB:%.*]] ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; -; X64_2LD-LABEL: @cmp_eq15( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-LABEL: define i32 @cmp_eq15( +; X64_2LD-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1069,12 +1110,13 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { } define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) { -; X64-LABEL: @cmp_eq16( -; X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-LABEL: define i32 @cmp_eq16( +; X64-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..091332439b186 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll new file mode 100644 index 0000000000000..b4f7780444b25 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-early.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -O2 -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + + +; Examples of when moving memcmp expansion earlier in the pipeline are beneficial + +@s1 = internal global ptr @.str, align 8 +@s2 = internal global ptr @.str.1, align 8 +@s3 = internal global ptr @.str.2, align 8 +@.str = private unnamed_addr constant [9 x i8] c"01234000\00", align 1 +@.str.1 = private unnamed_addr constant [9 x i8] c"0123!000\00", align 1 +@.str.2 = private unnamed_addr constant [9 x i8] c"0123?000\00", align 1 + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @memcmp_same_prefix_consts(ptr noundef %x) #0 { +; CHECK-LABEL: define dso_local noundef i32 @memcmp_same_prefix_consts( +; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP0]], 858927408 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 52 +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[IF_END8:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP9]], 858927408 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], 33 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[IF_END8]], label [[IF_THEN3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = xor i32 [[TMP18]], 858927408 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = zext i8 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 63 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP19]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = zext i1 [[TMP25]] to i32 +; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[TMP26]], 0 +; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[IF_END8]], label [[RETURN:%.*]] +; CHECK: if.end8: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END8]] ], [ 42, [[IF_THEN3]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %retval = alloca i32, align 4 + %x.addr = alloca ptr, align 8 + store ptr %x, ptr %x.addr, align 8 + %0 = load ptr, ptr %x.addr, align 8 + %1 = load ptr, ptr @s1, align 8 + %call = call i32 @memcmp(ptr noundef %0, ptr noundef %1, i64 noundef 5) #2 + %cmp = icmp ne i32 %call, 0 + br i1 %cmp, label %if.then, label %if.end8 + +if.then: ; preds = %entry + %2 = load ptr, ptr %x.addr, align 8 + %3 = load ptr, ptr @s2, align 8 + %call1 = call i32 @memcmp(ptr noundef %2, ptr noundef %3, i64 noundef 5) #2 + %cmp2 = icmp ne i32 %call1, 0 + br i1 %cmp2, label %if.then3, label %if.end7 + +if.then3: ; preds = %if.then + %4 = load ptr, ptr %x.addr, align 8 + %5 = load ptr, ptr @s3, align 8 + %call4 = call i32 @memcmp(ptr noundef %4, ptr noundef %5, i64 noundef 5) #2 + %cmp5 = icmp ne i32 %call4, 0 + br i1 %cmp5, label %if.then6, label %if.end + +if.then6: ; preds = %if.then3 + store i32 42, ptr %retval, align 4 + br label %return + +if.end: ; preds = %if.then3 + br label %if.end7 + +if.end7: ; preds = %if.end, %if.then + br label %if.end8 + +if.end8: ; preds = %if.end7, %entry + store i32 0, ptr %retval, align 4 + br label %return + +return: ; preds = %if.end8, %if.then6 + %6 = load i32, ptr %retval, align 4 + ret i32 %6 +} + +; Function Attrs: nounwind willreturn memory(read) +declare i32 @memcmp(ptr noundef, ptr noundef, i64 noundef) #1 + diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll new file mode 100644 index 0000000000000..2de1f8576f631 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp-mergeexpand.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S --passes=mergeicmps,expand-memcmp -mtriple=i686-unknown-linux < %s | FileCheck %s --check-prefix=X86 +; RUN: opt -S --passes=mergeicmps,expand-memcmp -mtriple=x86_64-unknown-linux < %s | FileCheck %s --check-prefix=X64 + +; This tests interaction between MergeICmp and ExpandMemCmp. + +%"struct.std::pair" = type { i32, i32 } + +define zeroext i1 @opeq1( +; X86-LABEL: define zeroext i1 @opeq1( +; X86-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr { +; X86-NEXT: "entry+land.rhs.i": +; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 1 +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 1 +; X86-NEXT: [[TMP2:%.*]] = xor i32 [[TMP0]], [[TMP1]] +; X86-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; X86-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 1 +; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X86-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP6]] +; X86-NEXT: [[TMP8:%.*]] = or i32 [[TMP2]], [[TMP7]] +; X86-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; X86-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; X86-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] +; X86: opeq1.exit: +; X86-NEXT: ret i1 [[TMP11]] +; +; X64-LABEL: define zeroext i1 @opeq1( +; X64-SAME: ptr nocapture readonly dereferenceable(8) [[A:%.*]], ptr nocapture readonly dereferenceable(8) [[B:%.*]]) local_unnamed_addr { +; X64-NEXT: "entry+land.rhs.i": +; X64-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 1 +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[B]], align 1 +; X64-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], [[TMP1]] +; X64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; X64-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; X64-NEXT: br label [[OPEQ1_EXIT:%.*]] +; X64: opeq1.exit: +; X64-NEXT: ret i1 [[TMP4]] +; + %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, + %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { +entry: + %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %0 = load i32, i32* %first.i, align 4 + %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %1 = load i32, i32* %first1.i, align 4 + %cmp.i = icmp eq i32 %0, %1 + br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit + +land.rhs.i: + %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %2 = load i32, i32* %second.i, align 4 + %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %3 = load i32, i32* %second2.i, align 4 + %cmp3.i = icmp eq i32 %2, %3 + br label %opeq1.exit + +opeq1.exit: + %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] + ret i1 %4 +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll new file mode 100644 index 0000000000000..a9dbf5cf4b58e --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/memcmp.ll @@ -0,0 +1,915 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -O2 -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1 + +declare i32 @memcmp(ptr, ptr, i64) + +declare i32 @bcmp(ptr, ptr, i64) + +; Function Attrs: nounwind +define i32 @length0(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define noundef i32 @length0( +; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: ret i32 0 +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length0_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define noundef i1 @length0_eq( +; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: ret i1 true +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length0_lt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define noundef i1 @length0_lt( +; CHECK-SAME: ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: ret i1 false +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) #0 + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length2(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length2( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length2_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length2_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length2_lt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length2_lt( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[TMP7]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0 + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length2_gt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length2_gt( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[TMP7]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #0 + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length2_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length2_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i16 [[TMP1]], 12849 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length2_eq_nobuiltin_attr( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 2) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) #1 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length3(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length3( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: loadbb: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i16 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length3_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length3_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; CHECK-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length4(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length4( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length4_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length4_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length4_lt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length4_lt( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0 + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length4_gt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length4_gt( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) #0 + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length4_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length4_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 875770417 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length5(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length5( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: loadbb: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length5_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length5_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length5_lt(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length5_lt( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: loadbb: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP13]], [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ] +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[PHI_RES]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) #0 + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length7_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length7_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: ret i1 [[TMP10]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length8(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length8( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length8_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length8_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length8_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length8_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 3978425819141910832 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length9_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length9_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length10_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length10_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length11_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length11_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length12_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length12_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: ret i1 [[TMP12]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length12(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length12( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: loadbb: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP3]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: br i1 [[TMP15]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length13_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length13_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length14_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length14_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @length15_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @length15_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length16(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length16( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: loadbb: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP2]], [[LOADBB:%.*]] ], [ [[TMP11:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP3]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11]] = tail call i64 @llvm.bswap.i64(i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12]] = tail call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP6]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length16_eq(ptr %x, ptr %y) #0 { +; CHECK-LABEL: define i1 @length16_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) #0 + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +; Function Attrs: nounwind +define i1 @length16_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length16_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i128 [[TMP1]], 70720121592765328381466889075544961328 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length24(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length24( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(24) [[X]], ptr noundef nonnull dereferenceable(24) [[Y]], i64 24) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length24_eq(ptr %x, ptr %y) #0 { +; CHECK-LABEL: define i1 @length24_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[TMP6]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = zext i64 [[TMP7]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = xor i128 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or i128 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) #0 + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +; Function Attrs: nounwind +define i1 @length24_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length24_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = zext i64 [[TMP4]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], 3689065127958034230 +; CHECK-NEXT: [[TMP7:%.*]] = or i128 [[TMP2]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: ret i1 [[TMP8]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length32(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length32( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(32) [[X]], ptr noundef nonnull dereferenceable(32) [[Y]], i64 32) #[[ATTR2]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length32_eq(ptr %x, ptr %y) #0 { +; CHECK-LABEL: define i1 @length32_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 16 +; CHECK-NEXT: [[TMP6:%.*]] = load i128, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = xor i128 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or i128 [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) #0 + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +; Function Attrs: nounwind +define i1 @length32_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length32_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], 70720121592765328381466889075544961328 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[X]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i128 [[TMP4]], 65382562593882267225249597816672106294 +; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: ret i1 [[TMP7]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) #0 + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @length64(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @length64( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR2]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @length64_eq(ptr %x, ptr %y) #0 { +; CHECK-LABEL: define i1 @length64_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) [[Y]], i64 64) #[[ATTR2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) #0 + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +; Function Attrs: nounwind +define i1 @length64_eq_const(ptr %X) #0 { +; CHECK-LABEL: define i1 @length64_eq_const( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(64) [[X]], ptr noundef nonnull dereferenceable(64) @.str, i64 64) #[[ATTR2]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @huge_length(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i32 @huge_length( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR2]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @huge_length_eq(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @huge_length_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr noundef nonnull dereferenceable(9223372036854775807) [[X]], ptr noundef nonnull dereferenceable(9223372036854775807) [[Y]], i64 9223372036854775807) #[[ATTR2]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) #0 { +; CHECK-LABEL: define i32 @nonconst_length( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR2]] +; CHECK-NEXT: ret i32 [[M]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) #0 + ret i32 %m +} + +; Function Attrs: nounwind +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) #0 { +; CHECK-LABEL: define i1 @nonconst_length_eq( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[M:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 [[SIZE]]) #[[ATTR2]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +; Function Attrs: nounwind +define i1 @bcmp_length2(ptr %X, ptr %Y) #0 { +; CHECK-LABEL: define i1 @bcmp_length2( +; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) #0 + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +attributes #0 = { nounwind } +attributes #1 = { nobuiltin nounwind } diff --git a/llvm/test/tools/llc/new-pm/start-stop.ll b/llvm/test/tools/llc/new-pm/start-stop.ll deleted file mode 100644 index c25e45d1f7ab9..0000000000000 --- a/llvm/test/tools/llc/new-pm/start-stop.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -filetype=null %s | FileCheck --match-full-lines %s - -; CHECK: IR pipeline: function(mergeicmps,expand-memcmp,gc-lowering) - diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 85f52941a85b4..b1b49b1e6764c 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -424,7 +424,6 @@ extern "C" int optMain( // supported. initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandLargeFpConvertLegacyPassPass(Registry); - initializeExpandMemCmpLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry); initializeCallBrPreparePass(Registry); diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index e78ef13869e64..adeedb42c5b82 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -55,7 +55,6 @@ static_library("CodeGen") { "ExecutionDomainFix.cpp", "ExpandLargeDivRem.cpp", "ExpandLargeFpConvert.cpp", - "ExpandMemCmp.cpp", "ExpandPostRAPseudos.cpp", "ExpandReductions.cpp", "ExpandVectorPredication.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index f080c06f10dfe..26d6db7fe4244 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -23,6 +23,7 @@ static_library("Scalar") { "DeadStoreElimination.cpp", "DivRemPairs.cpp", "EarlyCSE.cpp", + "ExpandMemCmp.cpp", "FlattenCFGPass.cpp", "Float2Int.cpp", "GVN.cpp",