diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6ae073eaaab24b..6d5880acc3f822 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1388,7 +1388,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } -void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { +void AArch64TargetLowering::addTypeForNEON(MVT VT) { assert(VT.isVector() && "VT should be a vector type"); if (VT.isFloatingPoint()) { @@ -1589,12 +1589,12 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &AArch64::FPR64RegClass); - addTypeForNEON(VT, MVT::v2i32); + addTypeForNEON(VT); } void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &AArch64::FPR128RegClass); - addTypeForNEON(VT, MVT::v4i32); + addTypeForNEON(VT); } EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index a0be2d52ef14bc..9c39859c038739 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -831,7 +831,7 @@ class AArch64TargetLowering : public TargetLowering { bool isExtFreeImpl(const Instruction *Ext) const override; - void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT); + void addTypeForNEON(MVT VT); void addTypeForFixedLengthSVE(MVT VT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 501175bf631bd4..fea43bbb4923bb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -154,8 +154,7 @@ static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, - MVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { if (VT != PromotedLdStVT) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); @@ -194,16 +193,6 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::SRL, VT, Custom); } - // Promote all bit-wise operations. - if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); - } - // Neon does not support vector divide/remainder operations. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); @@ -225,12 +214,12 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); - addTypeForNEON(VT, MVT::f64, MVT::v2i32); + addTypeForNEON(VT, MVT::f64); } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPairRegClass); - addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); + addTypeForNEON(VT, MVT::v2f64); } void ARMTargetLowering::setAllExpand(MVT VT) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 98ea3b06614ae6..844b7d4f1707b4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -756,7 +756,7 @@ class VectorType; bool HasStandaloneRem = true; - void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); + void addTypeForNEON(MVT VT, MVT PromotedLdStVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); std::pair getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 5cafe85b72399b..3ca6704c17b9e7 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5341,6 +5341,29 @@ def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; +multiclass BitwisePatterns { + def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)), + (!cast(Name#"d") DPR:$LHS, DPR:$RHS)>; + def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)), + (!cast(Name#"d") DPR:$LHS, DPR:$RHS)>; + def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)), + (!cast(Name#"d") DPR:$LHS, DPR:$RHS)>; + + def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)), + (!cast(Name#"q") QPR:$LHS, QPR:$RHS)>; + def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)), + (!cast(Name#"q") QPR:$LHS, QPR:$RHS)>; + def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)), + (!cast(Name#"q") QPR:$LHS, QPR:$RHS)>; +} + +let Predicates = [HasNEON] in { + defm : BitwisePatterns<"VAND", and, and>; + defm : BitwisePatterns<"VORR", or, or>; + defm : BitwisePatterns<"VEOR", xor, xor>; +} + def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), IIC_VMOVImm, @@ -5392,6 +5415,11 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (vnotq QPR:$Vm))))]>; } +let Predicates = [HasNEON] in { + defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>, + BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>; +} + def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), IIC_VMOVImm, @@ -5440,6 +5468,11 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (or QPR:$Vn, (vnotq QPR:$Vm))))]>; +let Predicates = [HasNEON] in { + defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>, + BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>; +} + // VMVN : Vector Bitwise NOT (Immediate) let isReMaterializable = 1 in { @@ -5483,8 +5516,18 @@ def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, "vmvn", "$Vd, $Vm", "", [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; let Predicates = [HasNEON] in { -def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; +def : Pat<(v1i64 (vnotd DPR:$src)), + (VMVNd DPR:$src)>; +def : Pat<(v4i16 (vnotd DPR:$src)), + (VMVNd DPR:$src)>; +def : Pat<(v8i8 (vnotd DPR:$src)), + (VMVNd DPR:$src)>; +def : Pat<(v2i64 (vnotq QPR:$src)), + (VMVNq QPR:$src)>; +def : Pat<(v8i16 (vnotq QPR:$src)), + (VMVNq QPR:$src)>; +def : Pat<(v16i8 (vnotq QPR:$src)), + (VMVNq QPR:$src)>; } // The TwoAddress pass will not go looking for equivalent operations @@ -5513,10 +5556,15 @@ def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; - def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; @@ -5544,6 +5592,12 @@ def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; +def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; +def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; @@ -5633,10 +5687,10 @@ def abd_shr : (zext node:$in2)), (i32 $shift))>; let Predicates = [HasNEON] in { -def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), - (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), - (zext (v2i32 DPR:$opB))), - (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), +def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)), + (v2i64 (add (sub (zext (v2i32 DPR:$opA)), + (zext (v2i32 DPR:$opB))), + (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), (VABDLuv2i64 DPR:$opA, DPR:$opB)>; } diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll index 9e2b35fe825848..014b61c69f88f2 100644 --- a/llvm/test/CodeGen/ARM/vector-promotion.ll +++ b/llvm/test/CodeGen/ARM/vector-promotion.ll @@ -356,18 +356,10 @@ define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %d } ; Check a vector with more than 2 elements. -; This requires the STRESS mode because currently 'or v8i8' is not marked -; as legal or custom, althought the actual assembly is better if we were -; promoting it. ; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8 ; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1 -; Scalar version: -; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1 -; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1 -; Vector version: -; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <8 x i8> [[LOAD]], -; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[OR]], i32 1 -; +; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <8 x i8> [[LOAD]], +; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[OR]], i32 1 ; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest ; IR-BOTH-NEXT: ret define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) { diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll index 694ffb1d0ecd15..9310bbc82faa2b 100644 --- a/llvm/test/CodeGen/ARM/vmov.ll +++ b/llvm/test/CodeGen/ARM/vmov.ll @@ -676,10 +676,9 @@ define arm_aapcs_vfpcc void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp ; CHECK-BE-LABEL: any_extend: ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vmov.i16 d16, #0x1 -; CHECK-BE-NEXT: vrev64.32 d17, d0 +; CHECK-BE-NEXT: vrev64.16 d17, d0 ; CHECK-BE-NEXT: vrev64.32 q9, q1 ; CHECK-BE-NEXT: vand d16, d17, d16 -; CHECK-BE-NEXT: vrev32.16 d16, d16 ; CHECK-BE-NEXT: vmovl.u16 q8, d16 ; CHECK-BE-NEXT: vsub.i32 q8, q8, q9 ; CHECK-BE-NEXT: vmovn.i32 d16, q8