diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index b0dd30c13137f..658d44771e8d6 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -51,7 +51,6 @@ FunctionPass *createAArch64A53Fix835769(); FunctionPass *createFalkorHWPFFixPass(); FunctionPass *createFalkorMarkStridedAccessesPass(); FunctionPass *createAArch64BranchTargetsPass(); -FunctionPass *createAArch64MIPeepholeOptPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -83,7 +82,6 @@ void initializeAArch64SLSHardeningPass(PassRegistry&); void initializeAArch64SpeculationHardeningPass(PassRegistry&); void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); -void initializeAArch64MIPeepholeOptPass(PassRegistry &); void initializeAArch64SIMDInstrOptPass(PassRegistry&); void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &); void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp deleted file mode 100644 index d5c7791ca02ec..0000000000000 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ /dev/null @@ -1,220 +0,0 @@ -//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass performs below peephole optimizations on MIR level. -// -// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri -// MOVi64imm + ANDXrr ==> ANDXri + ANDXri -// -// The mov pseudo instruction could be expanded to multiple mov instructions -// later. In this case, we could try to split the constant operand of mov -// instruction into two bitmask immediates. It makes two AND instructions -// intead of multiple `mov` + `and` instructions. -//===----------------------------------------------------------------------===// - -#include "AArch64ExpandImm.h" -#include "AArch64InstrInfo.h" -#include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-mi-peephole-opt" - -namespace { - -struct AArch64MIPeepholeOpt : public MachineFunctionPass { - static char ID; - - AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { - initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); - } - - const AArch64InstrInfo *TII; - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - - template - bool visitAND(MachineInstr &MI, - SmallSetVector &ToBeRemoved); - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { - return "AArch64 MI Peephole Optimization pass"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; - -char AArch64MIPeepholeOpt::ID = 0; - -} // end anonymous namespace - -INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", - "AArch64 MI Peephole Optimization", false, false) - -template -static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { - T UImm = static_cast(Imm); - if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) - return false; - - // If this immediate can be handled by one instruction, do not split it. - SmallVector Insn; - AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); - if (Insn.size() == 1) - return false; - - // The bitmask immediate consists of consecutive ones. Let's say there is - // constant 0b00000000001000000000010000000000 which does not consist of - // consecutive ones. We can split it in to two bitmask immediate like - // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. - // If we do AND with these two bitmask immediate, we can see original one. - unsigned LowestBitSet = countTrailingZeros(UImm); - unsigned HighestBitSet = Log2_64(UImm); - - // Create a mask which is filled with one from the position of lowest bit set - // to the position of highest bit set. - T NewImm1 = (static_cast(2) << HighestBitSet) - - (static_cast(1) << LowestBitSet); - // Create a mask which is filled with one outside the position of lowest bit - // set and the position of highest bit set. - T NewImm2 = UImm | ~NewImm1; - - // If the split value is not valid bitmask immediate, do not split this - // constant. - if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) - return false; - - Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); - Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); - return true; -} - -template -bool AArch64MIPeepholeOpt::visitAND( - MachineInstr &MI, SmallSetVector &ToBeRemoved) { - // Try below transformation. - // - // MOVi32imm + ANDWrr ==> ANDWri + ANDWri - // MOVi64imm + ANDXrr ==> ANDXri + ANDXri - // - // The mov pseudo instruction could be expanded to multiple mov instructions - // later. Let's try to split the constant operand of mov instruction into two - // bitmask immediates. It makes only two AND instructions intead of multiple - // mov + and instructions. - - unsigned RegSize = sizeof(T) * 8; - assert((RegSize == 32 || RegSize == 64) && - "Invalid RegSize for AND bitmask peephole optimization"); - - // Check whether AND's MBB is in loop and the AND is loop invariant. - MachineBasicBlock *MBB = MI.getParent(); - MachineLoop *L = MLI->getLoopFor(MBB); - if (L && !L->isLoopInvariant(MI)) - return false; - - // Check whether AND's operand is MOV with immediate. - MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); - MachineInstr *SubregToRegMI = nullptr; - // If it is SUBREG_TO_REG, check its operand. - if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { - SubregToRegMI = MovMI; - MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); - } - - if (MovMI->getOpcode() != AArch64::MOVi32imm && - MovMI->getOpcode() != AArch64::MOVi64imm) - return false; - - // If the MOV has multiple uses, do not split the immediate because it causes - // more instructions. - if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) - return false; - - if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) - return false; - - // Split the bitmask immediate into two. - T UImm = static_cast(MovMI->getOperand(1).getImm()); - T Imm1Enc; - T Imm2Enc; - if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc)) - return false; - - // Create new AND MIs. - DebugLoc DL = MI.getDebugLoc(); - const TargetRegisterClass *ANDImmRC = - (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC); - unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri; - - MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg)); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) - .addReg(SrcReg) - .addImm(Imm1Enc); - - MRI->constrainRegClass(DstReg, ANDImmRC); - BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) - .addReg(NewTmpReg) - .addImm(Imm2Enc); - - ToBeRemoved.insert(&MI); - if (SubregToRegMI) - ToBeRemoved.insert(SubregToRegMI); - ToBeRemoved.insert(MovMI); - - return true; -} - -bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - - TII = static_cast(MF.getSubtarget().getInstrInfo()); - MLI = &getAnalysis(); - MRI = &MF.getRegInfo(); - - if (!MRI->isSSA()) - return false; - - bool Changed = false; - SmallSetVector ToBeRemoved; - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - switch (MI.getOpcode()) { - default: - break; - case AArch64::ANDWrr: - Changed = visitAND(MI, ToBeRemoved); - break; - case AArch64::ANDXrr: - Changed = visitAND(MI, ToBeRemoved); - break; - } - } - } - - for (MachineInstr *MI : ToBeRemoved) - MI->eraseFromParent(); - - return Changed; -} - -FunctionPass *llvm::createAArch64MIPeepholeOptPass() { - return new AArch64MIPeepholeOpt(); -} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 5d26b6d41b4c5..6127f890118f9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -195,7 +195,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { initializeAArch64DeadRegisterDefinitionsPass(*PR); initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); - initializeAArch64MIPeepholeOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64O0PreLegalizerCombinerPass(*PR); initializeAArch64PreLegalizerCombinerPass(*PR); @@ -481,7 +480,6 @@ class AArch64PassConfig : public TargetPassConfig { bool addRegBankSelect() override; void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; - void addMachineSSAOptimization() override; bool addILPOpts() override; void addPreRegAlloc() override; void addPostRegAlloc() override; @@ -658,14 +656,6 @@ bool AArch64PassConfig::addGlobalInstructionSelect() { return false; } -void AArch64PassConfig::addMachineSSAOptimization() { - // Run default MachineSSAOptimization first. - TargetPassConfig::addMachineSSAOptimization(); - - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createAArch64MIPeepholeOptPass()); -} - bool AArch64PassConfig::addILPOpts() { if (EnableCondOpt) addPass(createAArch64ConditionOptimizerPass()); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index aeedeb4eebac8..a77a66bacc4c2 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -66,7 +66,6 @@ add_llvm_target(AArch64CodeGen AArch64LowerHomogeneousPrologEpilog.cpp AArch64MachineFunctionInfo.cpp AArch64MacroFusion.cpp - AArch64MIPeepholeOpt.cpp AArch64MCInstLower.cpp AArch64PromoteConstant.cpp AArch64PBQPRegAlloc.cpp diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h index 8765260935910..c3e74757675b6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -13,7 +13,6 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H -#include "AArch64ExpandImm.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/bit.h" diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 94c61d66a20d4..95816bd9d3262 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -40,7 +40,7 @@ ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Lazy Branch Probability Analysis @@ -132,7 +132,6 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions -; CHECK-NEXT: AArch64 MI Peephole Optimization pass ; CHECK-NEXT: AArch64 Dead register definitions ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll deleted file mode 100644 index 9f6d9f88e73e4..0000000000000 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ /dev/null @@ -1,245 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s - -define i8 @test1(i32 %a) { -; CHECK-LABEL: test1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0x3ffc00 -; CHECK-NEXT: and w8, w8, #0xffe007ff -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i32 %a, 2098176 - %cmp = icmp eq i32 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -; This constant should not be split because it can be handled by one mov. -define i8 @test2(i32 %a) { -; CHECK-LABEL: test2: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i32 %a, 135 - %cmp = icmp eq i32 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -; This constant should not be split because the split immediate is not valid -; bitmask immediate. -define i8 @test3(i32 %a) { -; CHECK-LABEL: test3: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 -; CHECK-NEXT: movk w8, #33, lsl #16 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i32 %a, 2163712 - %cmp = icmp eq i32 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -define i8 @test4(i64 %a) { -; CHECK-LABEL: test4: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and x8, x0, #0x3ffc00 -; CHECK-NEXT: and x8, x8, #0xffffffffffe007ff -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i64 %a, 2098176 - %cmp = icmp eq i64 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -define i8 @test5(i64 %a) { -; CHECK-LABEL: test5: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and x8, x0, #0x3ffffc000 -; CHECK-NEXT: and x8, x8, #0xfffffffe00007fff -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i64 %a, 8589950976 - %cmp = icmp eq i64 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -; This constant should not be split because it can be handled by one mov. -define i8 @test6(i64 %a) { -; CHECK-LABEL: test6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i64 %a, 135 - %cmp = icmp eq i64 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -; This constant should not be split because the split immediate is not valid -; bitmask immediate. -define i8 @test7(i64 %a) { -; CHECK-LABEL: test7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 -; CHECK-NEXT: movk w8, #33, lsl #16 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret -entry: - %and = and i64 %a, 2163712 - %cmp = icmp eq i64 %and, 1024 - %conv = zext i1 %cmp to i8 - ret i8 %conv -} - -; The split bitmask immediates should be hoisted outside loop because they are -; loop invariant. -define void @test8(i64 %a, i64* noalias %src, i64* noalias %dst, i64 %n) { -; CHECK-LABEL: test8: -; CHECK: // %bb.0: // %loop.ph -; CHECK-NEXT: and x9, x0, #0x3ffc00 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: and x9, x9, #0xffffffffffe007ff -; CHECK-NEXT: b .LBB7_2 -; CHECK-NEXT: .LBB7_1: // %for.inc -; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, x3 -; CHECK-NEXT: b.gt .LBB7_4 -; CHECK-NEXT: .LBB7_2: // %loop -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.hs .LBB7_1 -; CHECK-NEXT: // %bb.3: // %if.then -; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: lsl x10, x8, #3 -; CHECK-NEXT: ldr x11, [x1, x10] -; CHECK-NEXT: str x11, [x2, x10] -; CHECK-NEXT: b .LBB7_1 -; CHECK-NEXT: .LBB7_4: // %exit -; CHECK-NEXT: ret -loop.ph: - br label %loop - -loop: - %iv = phi i64 [ %inc, %for.inc ], [ 0, %loop.ph ] - %and = and i64 %a, 2098176 - %cmp = icmp ult i64 %iv, %and - br i1 %cmp, label %if.then, label %if.else - -if.then: - %src.arrayidx = getelementptr inbounds i64, i64* %src, i64 %iv - %val = load i64, i64* %src.arrayidx - %dst.arrayidx = getelementptr inbounds i64, i64* %dst, i64 %iv - store i64 %val, i64* %dst.arrayidx - br label %for.inc - -if.else: - br label %for.inc - -for.inc: - %inc = add nuw nsw i64 %iv, 1 - %cond = icmp sgt i64 %inc, %n - br i1 %cond, label %exit, label %loop - -exit: - ret void -} - -; This constant should not be split because the `and` is not loop invariant. -define i32 @test9(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) { -; CHECK-LABEL: test9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w2, #1 -; CHECK-NEXT: b.lt .LBB8_3 -; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w9, #1024 -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: movk w9, #32, lsl #16 -; CHECK-NEXT: .LBB8_2: // %for.body -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w10, [x1], #4 -; CHECK-NEXT: subs x8, x8, #1 -; CHECK-NEXT: and w10, w10, w9 -; CHECK-NEXT: str w10, [x0], #4 -; CHECK-NEXT: b.ne .LBB8_2 -; CHECK-NEXT: .LBB8_3: // %for.cond.cleanup -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -entry: - %cmp8 = icmp sgt i32 %n, 0 - br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - br label %for.body - -for.cond.cleanup: ; preds = %for.body, %entry - ret i32 0 - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 - %and = and i32 %0, 2098176 - %arrayidx2 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv - store i32 %and, i32* %arrayidx2, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body -} - -; After instruction selection end, we can see the `and` and `or` share the -; constant as below. -; -; %4:gpr32 = MOVi32imm 2098176 -; %5:gpr32 = ANDWrr killed %3:gpr32, %4:gpr32 -; STRWui killed %5:gpr32, %0:gpr64common, 0 :: (store (s32) into %ir.x, !tbaa !8) -; %6:gpr32 = LDRWui %1:gpr64common, 0 :: (load (s32) from %ir.y, !tbaa !8) -; %7:gpr32 = ORRWrr killed %6:gpr32, %4:gpr32 -; -; In this case, the constant should not be split because it causes more -; instructions. -define void @test10(i32* nocapture %x, i32* nocapture readonly %y, i32* nocapture %z) { -; CHECK-LABEL: test10: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: mov w9, #1024 -; CHECK-NEXT: movk w9, #32, lsl #16 -; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: str w8, [x2] -; CHECK-NEXT: ret -entry: - %0 = load i32, i32* %y, align 4 - %and = and i32 %0, 2098176 - store i32 %and, i32* %x, align 4 - %1 = load i32, i32* %y, align 4 - %or = or i32 %1, 2098176 - store i32 %or, i32* %z, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll index 3e30f45cfabb3..2e20ef67b2a2d 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -245,9 +245,10 @@ define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xffffff00 +; CHECK-NEXT: mov w9, #256 +; CHECK-NEXT: movk w9, #65280, lsl #16 ; CHECK-NEXT: and w8, w0, #0xffff00 -; CHECK-NEXT: and w9, w9, #0xff0001ff +; CHECK-NEXT: and w9, w1, w9 ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i32 %x, 16776960