diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 17d96370c04a5..bb32280fe51ff 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5403,6 +5403,52 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +// The following SetCC patterns are used for GlobalISel only +multiclass SelectSetCC { + def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (v8i8 (!cast(INST # v8i8) (v8i8 V64:$Rn), (v8i8 V64:$Rm)))>; + def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))), + (v16i8 (!cast(INST # v16i8) (v16i8 V128:$Rn), (v16i8 V128:$Rm)))>; + def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (v4i16 (!cast(INST # v4i16) (v4i16 V64:$Rn), (v4i16 V64:$Rm)))>; + def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))), + (v8i16 (!cast(INST # v8i16) (v8i16 V128:$Rn), (v8i16 V128:$Rm)))>; + def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (v2i32 (!cast(INST # v2i32) (v2i32 V64:$Rn), (v2i32 V64:$Rm)))>; + def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))), + (v4i32 (!cast(INST # v4i32) (v4i32 V128:$Rn), (v4i32 V128:$Rm)))>; + def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))), + (v2i64 (!cast(INST # v2i64) (v2i64 V128:$Rn), (v2i64 V128:$Rm)))>; +} + +defm : SelectSetCC; +defm : SelectSetCC; +defm : SelectSetCC; +defm : SelectSetCC; +defm : SelectSetCC; + +multiclass SelectSetCCSwapOperands { + def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (v8i8 (!cast(INST # v8i8) (v8i8 V64:$Rm), (v8i8 V64:$Rn)))>; + def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))), + (v16i8 (!cast(INST # v16i8) (v16i8 V128:$Rm), (v16i8 V128:$Rn)))>; + def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (v4i16 (!cast(INST # v4i16) (v4i16 V64:$Rm), (v4i16 V64:$Rn)))>; + def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))), + (v8i16 (!cast(INST # v8i16) (v8i16 V128:$Rm), (v8i16 V128:$Rn)))>; + def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (v2i32 (!cast(INST # v2i32) (v2i32 V64:$Rm), (v2i32 V64:$Rn)))>; + def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))), + (v4i32 (!cast(INST # v4i32) (v4i32 V128:$Rm), (v4i32 V128:$Rn)))>; + def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))), + (v2i64 (!cast(INST # v2i64) (v2i64 V128:$Rm), (v2i64 V128:$Rn)))>; +} + +defm : SelectSetCCSwapOperands; +defm : SelectSetCCSwapOperands; +defm : SelectSetCCSwapOperands; +defm : SelectSetCCSwapOperands; + let Predicates = [HasNEON] in { def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 61f5bc2464ee5..1b65ae7b47826 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -221,7 +221,6 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectIntrinsicWithSideEffects(MachineInstr &I, MachineRegisterInfo &MRI); bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); - bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); @@ -3403,7 +3402,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } case TargetOpcode::G_ICMP: { if (Ty.isVector()) - return selectVectorICmp(I, MRI); + return false; if (Ty != LLT::scalar(32)) { LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty @@ -3652,177 +3651,6 @@ bool AArch64InstructionSelector::selectTLSGlobalValue( return true; } -bool AArch64InstructionSelector::selectVectorICmp( - MachineInstr &I, MachineRegisterInfo &MRI) { - Register DstReg = I.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - Register SrcReg = I.getOperand(2).getReg(); - Register Src2Reg = I.getOperand(3).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); - unsigned NumElts = DstTy.getNumElements(); - - // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b - // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 - // Third index is cc opcode: - // 0 == eq - // 1 == ugt - // 2 == uge - // 3 == ult - // 4 == ule - // 5 == sgt - // 6 == sge - // 7 == slt - // 8 == sle - // ne is done by negating 'eq' result. - - // This table below assumes that for some comparisons the operands will be - // commuted. - // ult op == commute + ugt op - // ule op == commute + uge op - // slt op == commute + sgt op - // sle op == commute + sge op - unsigned PredIdx = 0; - bool SwapOperands = false; - CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); - switch (Pred) { - case CmpInst::ICMP_NE: - case CmpInst::ICMP_EQ: - PredIdx = 0; - break; - case CmpInst::ICMP_UGT: - PredIdx = 1; - break; - case CmpInst::ICMP_UGE: - PredIdx = 2; - break; - case CmpInst::ICMP_ULT: - PredIdx = 3; - SwapOperands = true; - break; - case CmpInst::ICMP_ULE: - PredIdx = 4; - SwapOperands = true; - break; - case CmpInst::ICMP_SGT: - PredIdx = 5; - break; - case CmpInst::ICMP_SGE: - PredIdx = 6; - break; - case CmpInst::ICMP_SLT: - PredIdx = 7; - SwapOperands = true; - break; - case CmpInst::ICMP_SLE: - PredIdx = 8; - SwapOperands = true; - break; - default: - llvm_unreachable("Unhandled icmp predicate"); - return false; - } - - // This table obviously should be tablegen'd when we have our GISel native - // tablegen selector. - - static const unsigned OpcTable[4][4][9] = { - { - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, - AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, - AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, - {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, - AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, - AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} - }, - { - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, - AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, - AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, - {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, - AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, - AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, - { - {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, - AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, - AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, - {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, - AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, - AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, - { - {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, - AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, - AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, - }; - unsigned EltIdx = Log2_32(SrcEltSize / 8); - unsigned NumEltsIdx = Log2_32(NumElts / 2); - unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; - if (!Opc) { - LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); - return false; - } - - const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); - const TargetRegisterClass *SrcRC = - getRegClassForTypeOnBank(SrcTy, VecRB, true); - if (!SrcRC) { - LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); - return false; - } - - unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; - if (SrcTy.getSizeInBits() == 128) - NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; - - if (SwapOperands) - std::swap(SrcReg, Src2Reg); - - auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); - constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); - - // Invert if we had a 'ne' cc. - if (NotOpc) { - Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); - constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); - } else { - MIB.buildCopy(DstReg, Cmp.getReg(0)); - } - RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); - I.eraseFromParent(); - return true; -} - MachineInstr *AArch64InstructionSelector::emitScalarToVector( unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, MachineIRBuilder &MIRBuilder) const { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index d4aac94d24f12..b8274f0f872c6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -495,17 +495,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // FIXME: fix moreElementsToNextPow2 getActionDefinitionsBuilder(G_ICMP) - .legalFor({{s32, s32}, - {s32, s64}, - {s32, p0}, - {v4s32, v4s32}, - {v2s32, v2s32}, - {v2s64, v2s64}, - {v2s64, v2p0}, - {v4s16, v4s16}, - {v8s16, v8s16}, - {v8s8, v8s8}, - {v16s8, v16s8}}) + .legalFor({{s32, s32}, {s32, s64}, {s32, p0}}) .widenScalarOrEltToNextPow2(1) .clampScalar(1, s32, s64) .clampScalar(0, s32, s32) @@ -527,7 +517,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(1, v8s8, v16s8) .clampNumElements(1, v4s16, v8s16) .clampNumElements(1, v2s32, v4s32) - .clampNumElements(1, v2s64, v2s64); + .clampNumElements(1, v2s64, v2s64) + .customIf(isVector(0)); getActionDefinitionsBuilder(G_FCMP) .legalFor({{s32, MinFPScalar}, @@ -1266,6 +1257,8 @@ bool AArch64LegalizerInfo::legalizeCustom( return legalizePrefetch(MI, Helper); case TargetOpcode::G_ABS: return Helper.lowerAbsToCNeg(MI); + case TargetOpcode::G_ICMP: + return legalizeICMP(MI, MRI, MIRBuilder); } llvm_unreachable("expected switch to return"); @@ -1324,6 +1317,36 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI, return true; } +bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg1 = MI.getOperand(2).getReg(); + Register SrcReg2 = MI.getOperand(3).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg1); + + // Check the vector types are legal + if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() || + DstTy.getNumElements() != SrcTy.getNumElements() || + (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128)) + return false; + + // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for + // following passes + CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate(); + if (Pred != CmpInst::ICMP_NE) + return true; + Register CmpReg = + MIRBuilder + .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2) + .getReg(0); + MIRBuilder.buildNot(DstReg, CmpReg); + + MI.eraseFromParent(); + return true; +} + bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index b69d9b015bd2b..00d85a36e4b2c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -50,6 +50,8 @@ class AArch64LegalizerInfo : public LegalizerInfo { LegalizerHelper &Helper) const; bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir index 4151f7ecb3eac..df4e7ddaac8b9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir @@ -361,8 +361,8 @@ body: | ; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0 ; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1 ; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2 - ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs - ; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]] + ; CHECK-NEXT: %cmp:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs + ; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, %cmp ; CHECK-NEXT: $q0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $q0 %cmp_lhs:fpr(<4 x s32>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir index 21e84ecaed32f..7884d9e1b1d72 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir @@ -46,46 +46,6 @@ ret <8 x i1> %cmp } - define <2 x i1> @test_v2i64_ne(<2 x i64> %v1, <2 x i64> %v2) { - %cmp = icmp ne <2 x i64> %v1, %v2 - ret <2 x i1> %cmp - } - - define <4 x i1> @test_v4i32_ne(<4 x i32> %v1, <4 x i32> %v2) { - %cmp = icmp ne <4 x i32> %v1, %v2 - ret <4 x i1> %cmp - } - - define <2 x i1> @test_v2i32_ne(<2 x i32> %v1, <2 x i32> %v2) { - %cmp = icmp ne <2 x i32> %v1, %v2 - ret <2 x i1> %cmp - } - - define <2 x i1> @test_v2i16_ne(<2 x i16> %v1, <2 x i16> %v2) { - %cmp = icmp ne <2 x i16> %v1, %v2 - ret <2 x i1> %cmp - } - - define <8 x i1> @test_v8i16_ne(<8 x i16> %v1, <8 x i16> %v2) { - %cmp = icmp ne <8 x i16> %v1, %v2 - ret <8 x i1> %cmp - } - - define <4 x i1> @test_v4i16_ne(<4 x i16> %v1, <4 x i16> %v2) { - %cmp = icmp ne <4 x i16> %v1, %v2 - ret <4 x i1> %cmp - } - - define <16 x i1> @test_v16i8_ne(<16 x i8> %v1, <16 x i8> %v2) { - %cmp = icmp ne <16 x i8> %v1, %v2 - ret <16 x i1> %cmp - } - - define <8 x i1> @test_v8i8_ne(<8 x i8> %v1, <8 x i8> %v2) { - %cmp = icmp ne <8 x i8> %v1, %v2 - ret <8 x i1> %cmp - } - define <2 x i1> @test_v2i64_ugt(<2 x i64> %v1, <2 x i64> %v2) { %cmp = icmp ugt <2 x i64> %v1, %v2 ret <2 x i1> %cmp @@ -696,304 +656,6 @@ body: | $d0 = COPY %3(<8 x s8>) RET_ReallyLR implicit $d0 -... ---- -name: test_v2i64_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: test_v2i64_ne - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK-NEXT: [[CMEQv2i64_:%[0-9]+]]:fpr128 = CMEQv2i64 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv2i64_]] - ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[NOTv16i8_]] - ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<2 x s64>) = COPY $q0 - %1:fpr(<2 x s64>) = COPY $q1 - %4:fpr(<2 x s64>) = G_ICMP intpred(ne), %0(<2 x s64>), %1 - %3:fpr(<2 x s32>) = G_TRUNC %4(<2 x s64>) - $d0 = COPY %3(<2 x s32>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v4i32_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: test_v4i32_ne - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv4i32_]] - ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[NOTv16i8_]] - ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<4 x s32>) = COPY $q0 - %1:fpr(<4 x s32>) = COPY $q1 - %4:fpr(<4 x s32>) = G_ICMP intpred(ne), %0(<4 x s32>), %1 - %3:fpr(<4 x s16>) = G_TRUNC %4(<4 x s32>) - $d0 = COPY %3(<4 x s16>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v2i32_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0, $d1 - - ; CHECK-LABEL: name: test_v2i32_ne - ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]] - ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<2 x s32>) = COPY $d0 - %1:fpr(<2 x s32>) = COPY $d1 - %4:fpr(<2 x s32>) = G_ICMP intpred(ne), %0(<2 x s32>), %1 - %3:fpr(<2 x s32>) = COPY %4(<2 x s32>) - $d0 = COPY %3(<2 x s32>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v2i16_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - - { id: 4, class: _ } - - { id: 5, class: fpr } - - { id: 6, class: _ } - - { id: 7, class: fpr } - - { id: 8, class: fpr } - - { id: 9, class: fpr } - - { id: 10, class: gpr } - - { id: 11, class: fpr } - - { id: 12, class: fpr } - - { id: 13, class: gpr } - - { id: 14, class: fpr } - - { id: 15, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0, $d1 - - ; CHECK-LABEL: name: test_v2i16_ne - ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51 - ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]] - ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51 - ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]] - ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[ANDv8i8_]], [[ANDv8i8_1]] - ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]] - ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %2:fpr(<2 x s32>) = COPY $d0 - %3:fpr(<2 x s32>) = COPY $d1 - %13:gpr(s32) = G_CONSTANT i32 65535 - %14:fpr(<2 x s32>) = G_BUILD_VECTOR %13(s32), %13(s32) - %15:fpr(<2 x s32>) = COPY %2(<2 x s32>) - %7:fpr(<2 x s32>) = G_AND %15, %14 - %10:gpr(s32) = G_CONSTANT i32 65535 - %11:fpr(<2 x s32>) = G_BUILD_VECTOR %10(s32), %10(s32) - %12:fpr(<2 x s32>) = COPY %3(<2 x s32>) - %8:fpr(<2 x s32>) = G_AND %12, %11 - %9:fpr(<2 x s32>) = G_ICMP intpred(ne), %7(<2 x s32>), %8 - %5:fpr(<2 x s32>) = COPY %9(<2 x s32>) - $d0 = COPY %5(<2 x s32>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v8i16_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: test_v8i16_ne - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK-NEXT: [[CMEQv8i16_:%[0-9]+]]:fpr128 = CMEQv8i16 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv8i16_]] - ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[NOTv16i8_]] - ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<8 x s16>) = COPY $q0 - %1:fpr(<8 x s16>) = COPY $q1 - %4:fpr(<8 x s16>) = G_ICMP intpred(ne), %0(<8 x s16>), %1 - %3:fpr(<8 x s8>) = G_TRUNC %4(<8 x s16>) - $d0 = COPY %3(<8 x s8>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v4i16_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0, $d1 - - ; CHECK-LABEL: name: test_v4i16_ne - ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK-NEXT: [[CMEQv4i16_:%[0-9]+]]:fpr64 = CMEQv4i16 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv4i16_]] - ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<4 x s16>) = COPY $d0 - %1:fpr(<4 x s16>) = COPY $d1 - %4:fpr(<4 x s16>) = G_ICMP intpred(ne), %0(<4 x s16>), %1 - %3:fpr(<4 x s16>) = COPY %4(<4 x s16>) - $d0 = COPY %3(<4 x s16>) - RET_ReallyLR implicit $d0 - -... ---- -name: test_v16i8_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $q0, $q1 - - ; CHECK-LABEL: name: test_v16i8_ne - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK-NEXT: [[CMEQv16i8_:%[0-9]+]]:fpr128 = CMEQv16i8 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv16i8_]] - ; CHECK-NEXT: $q0 = COPY [[NOTv16i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:fpr(<16 x s8>) = COPY $q0 - %1:fpr(<16 x s8>) = COPY $q1 - %4:fpr(<16 x s8>) = G_ICMP intpred(ne), %0(<16 x s8>), %1 - %3:fpr(<16 x s8>) = COPY %4(<16 x s8>) - $q0 = COPY %3(<16 x s8>) - RET_ReallyLR implicit $q0 - -... ---- -name: test_v8i8_ne -alignment: 4 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: _ } - - { id: 3, class: fpr } - - { id: 4, class: fpr } -machineFunctionInfo: {} -body: | - bb.1 (%ir-block.0): - liveins: $d0, $d1 - - ; CHECK-LABEL: name: test_v8i8_ne - ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK-NEXT: [[CMEQv8i8_:%[0-9]+]]:fpr64 = CMEQv8i8 [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv8i8_]] - ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]] - ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:fpr(<8 x s8>) = COPY $d0 - %1:fpr(<8 x s8>) = COPY $d1 - %4:fpr(<8 x s8>) = G_ICMP intpred(ne), %0(<8 x s8>), %1 - %3:fpr(<8 x s8>) = COPY %4(<8 x s8>) - $d0 = COPY %3(<8 x s8>) - RET_ReallyLR implicit $d0 - ... --- name: test_v2i64_ugt