diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 6a02c6d538886..e31afe7a0f4af 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -215,14 +215,6 @@ def unmerge_ext_to_unmerge : GICombineRule< (apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }]) >; -def regtriple_matchdata : GIDefMatchData<"std::tuple">; -def or_to_bsp: GICombineRule < - (defs root:$root, regtriple_matchdata:$matchinfo), - (match (wip_match_opcode G_OR):$root, - [{ return matchOrToBSP(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }]) ->; - // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -250,5 +242,5 @@ def AArch64PostLegalizerCombiner constant_fold_binops, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, - select_to_minmax, or_to_bsp]> { + select_to_minmax]> { } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 0b3509cf02d68..f9f860607b587 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -215,9 +215,8 @@ def G_PREFETCH : AArch64GenericInstruction { let hasSideEffects = 1; } -// Generic instruction for the BSP pseudo. It is expanded into BSP, which -// expands into BSL/BIT/BIF after register allocation. -def G_BSP : AArch64GenericInstruction { +// Generic bitwise insert if true. +def G_BIT : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); let hasSideEffects = 0; @@ -253,7 +252,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9ea4e349e69b9..f1dfddf9b4404 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1790,7 +1790,7 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI, if (DstSize == 64) Mask = MIRBuilder.buildFNeg(VecTy, Mask); - auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins1, Ins2}); + auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask}); // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We // want this to eventually become an EXTRACT_SUBREG. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 91c261888df98..5e248f568effc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -338,47 +338,6 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI, Store.eraseFromParent(); } -bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI, - std::tuple &MatchInfo) { - const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - if (!DstTy.isVector()) - return false; - - Register AO1, AO2, BVO1, BVO2; - if (!mi_match( - MI, MRI, - m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)), m_GAnd(m_Reg(AO2), m_Reg(BVO2))))) - return false; - - auto *BV1 = getOpcodeDef(BVO1, MRI); - auto *BV2 = getOpcodeDef(BVO2, MRI); - if (!BV1 || !BV2) - return false; - - for (int I = 0, E = DstTy.getNumElements(); I < E; I++) { - auto ValAndVReg1 = - getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI); - auto ValAndVReg2 = - getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI); - if (!ValAndVReg1 || !ValAndVReg2 || - ValAndVReg1->Value != ~ValAndVReg2->Value) - return false; - } - - MatchInfo = {AO1, AO2, BVO2}; - return true; -} - -void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B, - std::tuple &MatchInfo) { - B.setInstrAndDebugLoc(MI); - B.buildInstr( - AArch64::G_BSP, {MI.getOperand(0).getReg()}, - {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)}); - MI.eraseFromParent(); -} - class AArch64PostLegalizerCombinerImpl : public Combiner { protected: // TODO: Make CombinerHelper methods const. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir index cae2c06e44c5a..912daad7d60b0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir @@ -10,19 +10,18 @@ body: | liveins: $s0, $s1 ; CHECK-LABEL: name: legalize_s32 ; CHECK: liveins: $s0, $s1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s32) = COPY $s0 - ; CHECK-NEXT: %sign:_(s32) = COPY $s1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC]], [[IVEC1]] - ; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>) - ; CHECK-NEXT: $s0 = COPY %fcopysign(s32) - ; CHECK-NEXT: RET_ReallyLR implicit $s0 + ; CHECK: %val:_(s32) = COPY $s0 + ; CHECK: %sign:_(s32) = COPY $s1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]] + ; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>) + ; CHECK: $s0 = COPY %fcopysign(s32) + ; CHECK: RET_ReallyLR implicit $s0 %val:_(s32) = COPY $s0 %sign:_(s32) = COPY $s1 %fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32) @@ -38,19 +37,18 @@ body: | liveins: $d0, $d1 ; CHECK-LABEL: name: legalize_s64 ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %val:_(s64) = COPY $d0 - ; CHECK-NEXT: %sign:_(s64) = COPY $d1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]] - ; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC]], [[IVEC1]] - ; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>) - ; CHECK-NEXT: $d0 = COPY %fcopysign(s64) - ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK: %val:_(s64) = COPY $d0 + ; CHECK: %sign:_(s64) = COPY $d1 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]] + ; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]] + ; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>) + ; CHECK: $d0 = COPY %fcopysign(s64) + ; CHECK: RET_ReallyLR implicit $d0 %val:_(s64) = COPY $d0 %sign:_(s64) = COPY $d1 %fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir index e3edb62809774..843810619c5c5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir @@ -13,17 +13,16 @@ body: | ; CHECK-LABEL: name: BITv8i8_v2s32 ; CHECK: liveins: $d0, $d1, $d2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr64 = COPY $d0 - ; CHECK-NEXT: %mhs:fpr64 = COPY $d1 - ; CHECK-NEXT: %rhs:fpr64 = COPY $d2 - ; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $d0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK: %lhs:fpr64 = COPY $d0 + ; CHECK: %mhs:fpr64 = COPY $d1 + ; CHECK: %rhs:fpr64 = COPY $d2 + ; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs + ; CHECK: $d0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $d0 %lhs:fpr(<2 x s32>) = COPY $d0 %mhs:fpr(<2 x s32>) = COPY $d1 %rhs:fpr(<2 x s32>) = COPY $d2 - %bit:fpr(<2 x s32>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<2 x s32>) = G_BIT %lhs, %mhs, %rhs $d0 = COPY %bit(<2 x s32>) RET_ReallyLR implicit $d0 @@ -38,17 +37,16 @@ body: | liveins: $d0, $d1, $d2 ; CHECK-LABEL: name: BITv8i8_v4s16 ; CHECK: liveins: $d0, $d1, $d2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr64 = COPY $d0 - ; CHECK-NEXT: %mhs:fpr64 = COPY $d1 - ; CHECK-NEXT: %rhs:fpr64 = COPY $d2 - ; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $d0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK: %lhs:fpr64 = COPY $d0 + ; CHECK: %mhs:fpr64 = COPY $d1 + ; CHECK: %rhs:fpr64 = COPY $d2 + ; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs + ; CHECK: $d0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $d0 %lhs:fpr(<4 x s16>) = COPY $d0 %mhs:fpr(<4 x s16>) = COPY $d1 %rhs:fpr(<4 x s16>) = COPY $d2 - %bit:fpr(<4 x s16>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<4 x s16>) = G_BIT %lhs, %mhs, %rhs $d0 = COPY %bit(<4 x s16>) RET_ReallyLR implicit $d0 @@ -64,17 +62,16 @@ body: | ; CHECK-LABEL: name: BITv16i8_v2s64 ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr128 = COPY $q0 - ; CHECK-NEXT: %mhs:fpr128 = COPY $q1 - ; CHECK-NEXT: %rhs:fpr128 = COPY $q2 - ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $q0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: %lhs:fpr128 = COPY $q0 + ; CHECK: %mhs:fpr128 = COPY $q1 + ; CHECK: %rhs:fpr128 = COPY $q2 + ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs + ; CHECK: $q0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $q0 %lhs:fpr(<2 x s64>) = COPY $q0 %mhs:fpr(<2 x s64>) = COPY $q1 %rhs:fpr(<2 x s64>) = COPY $q2 - %bit:fpr(<2 x s64>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<2 x s64>) = G_BIT %lhs, %mhs, %rhs $q0 = COPY %bit(<2 x s64>) RET_ReallyLR implicit $q0 @@ -90,17 +87,16 @@ body: | ; CHECK-LABEL: name: BITv16i8_v4s32 ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr128 = COPY $q0 - ; CHECK-NEXT: %mhs:fpr128 = COPY $q1 - ; CHECK-NEXT: %rhs:fpr128 = COPY $q2 - ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $q0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: %lhs:fpr128 = COPY $q0 + ; CHECK: %mhs:fpr128 = COPY $q1 + ; CHECK: %rhs:fpr128 = COPY $q2 + ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs + ; CHECK: $q0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $q0 %lhs:fpr(<4 x s32>) = COPY $q0 %mhs:fpr(<4 x s32>) = COPY $q1 %rhs:fpr(<4 x s32>) = COPY $q2 - %bit:fpr(<4 x s32>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<4 x s32>) = G_BIT %lhs, %mhs, %rhs $q0 = COPY %bit(<4 x s32>) RET_ReallyLR implicit $q0 @@ -116,17 +112,16 @@ body: | ; CHECK-LABEL: name: BITv16i8_v8s16 ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr128 = COPY $q0 - ; CHECK-NEXT: %mhs:fpr128 = COPY $q1 - ; CHECK-NEXT: %rhs:fpr128 = COPY $q2 - ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $q0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: %lhs:fpr128 = COPY $q0 + ; CHECK: %mhs:fpr128 = COPY $q1 + ; CHECK: %rhs:fpr128 = COPY $q2 + ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs + ; CHECK: $q0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $q0 %lhs:fpr(<8 x s16>) = COPY $q0 %mhs:fpr(<8 x s16>) = COPY $q1 %rhs:fpr(<8 x s16>) = COPY $q2 - %bit:fpr(<8 x s16>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<8 x s16>) = G_BIT %lhs, %mhs, %rhs $q0 = COPY %bit(<8 x s16>) RET_ReallyLR implicit $q0 @@ -142,16 +137,15 @@ body: | ; CHECK-LABEL: name: BITv16i8_v16s8 ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %lhs:fpr128 = COPY $q0 - ; CHECK-NEXT: %mhs:fpr128 = COPY $q1 - ; CHECK-NEXT: %rhs:fpr128 = COPY $q2 - ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs - ; CHECK-NEXT: $q0 = COPY %bit - ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK: %lhs:fpr128 = COPY $q0 + ; CHECK: %mhs:fpr128 = COPY $q1 + ; CHECK: %rhs:fpr128 = COPY $q2 + ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs + ; CHECK: $q0 = COPY %bit + ; CHECK: RET_ReallyLR implicit $q0 %lhs:fpr(<16 x s8>) = COPY $q0 %mhs:fpr(<16 x s8>) = COPY $q1 %rhs:fpr(<16 x s8>) = COPY $q2 - %bit:fpr(<16 x s8>) = G_BSP %lhs, %mhs, %rhs + %bit:fpr(<16 x s8>) = G_BIT %lhs, %mhs, %rhs $q0 = COPY %bit(<16 x s8>) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 4f13b78d6c169..47de57a68be96 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -67,9 +67,13 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { ; ; CHECK-GI-LABEL: bsl8xi8_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI6_0 -; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI6_0] -; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-GI-NEXT: adrp x8, .LCPI6_1 +; CHECK-GI-NEXT: adrp x9, .LCPI6_0 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI6_1] +; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI6_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 > %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 > @@ -86,9 +90,13 @@ define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-GI-LABEL: bsl16xi8_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI7_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: adrp x8, .LCPI7_1 +; CHECK-GI-NEXT: adrp x9, .LCPI7_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_1] +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI7_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 > %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -918,9 +926,13 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { ; ; CHECK-GI-LABEL: bsl2xi32_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI70_0 -; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI70_0] -; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-GI-NEXT: adrp x8, .LCPI70_1 +; CHECK-GI-NEXT: adrp x9, .LCPI70_0 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI70_1] +; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI70_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 > %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 > @@ -938,9 +950,13 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { ; ; CHECK-GI-LABEL: bsl4xi16_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI71_0 -; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI71_0] -; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-GI-NEXT: adrp x8, .LCPI71_1 +; CHECK-GI-NEXT: adrp x9, .LCPI71_0 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI71_1] +; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI71_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 > %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 > @@ -979,9 +995,13 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-GI-LABEL: bsl4xi32_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI73_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI73_0] -; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: adrp x8, .LCPI73_1 +; CHECK-GI-NEXT: adrp x9, .LCPI73_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI73_1] +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI73_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 > %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 > @@ -998,9 +1018,13 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-GI-LABEL: bsl8xi16_const: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI74_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI74_0] -; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: adrp x8, .LCPI74_1 +; CHECK-GI-NEXT: adrp x9, .LCPI74_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI74_1] +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI74_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 > %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 > @@ -1009,12 +1033,23 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { } define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: bsl2xi64_const: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI75_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI75_0] -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bsl2xi64_const: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI75_0 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI75_0] +; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bsl2xi64_const: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI75_1 +; CHECK-GI-NEXT: adrp x9, .LCPI75_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI75_1] +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI75_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 > %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 > %tmp3 = or <2 x i64> %tmp1, %tmp2