diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 0e94b78d11d83..a5b09b2e46b36 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -897,6 +897,11 @@ def FeatureUseWzrToVecMove : SubtargetFeature<"use-wzr-to-vec-move", "UseWzrToVecMove", "true", "Move from WZR to insert 0 into vector registers">; +// On some processors, 2 operand xor use higher throughput paths than 3 operand xor, making +// it more beneficial to keep them separate. +def FeatureDisableXorMerge : SubtargetFeature<"disable-xor-merge", + "DisableXorMerge", "true", + "Prevent chained XOR instructions from being merged into EOR3">; //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b9e299ef37454..00781ed4204dd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -436,6 +436,7 @@ def AllowMisalignedMemAccesses : Predicate<"!Subtarget->requiresStrictAlign()">; def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">; +def AllowXorToEor3Merge : Predicate<"!Subtarget->disableXorMerge()">; //===----------------------------------------------------------------------===// @@ -1805,14 +1806,24 @@ def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; -class EOR3_pattern - : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), - (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; +multiclass EOR3_pattern{ + def: Pat<(xor (xor (Vec128Ty V128:$Vn), (Vec128Ty V128:$Vm)), (Vec128Ty V128:$Va)), + (EOR3 (Vec128Ty V128:$Vn), (Vec128Ty V128:$Vm), (Vec128Ty V128:$Va))>; + def : Pat<(xor (xor (Vec64Ty V64:$Vn), (Vec64Ty V64:$Vm)), (Vec64Ty V64:$Va)), + (EXTRACT_SUBREG + (EOR3 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vn, dsub), + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vm, dsub), + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Va, dsub)), + dsub)>; +} -def : EOR3_pattern; -def : EOR3_pattern; -def : EOR3_pattern; -def : EOR3_pattern; +let Predicates = [AllowXorToEor3Merge], AddedComplexity=1 in { +defm : EOR3_pattern; +defm : EOR3_pattern; +defm : EOR3_pattern; +defm : EOR3_pattern; +} class BCAX_pattern : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 81f5d075729d9..1b0eb687c3be8 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -1081,7 +1081,8 @@ def ProcessorFeatures { FeatureCCIDX, FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, - FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC]; + FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC, + FeatureDisableXorMerge]; list NeoverseN3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML, FeatureFullFP16, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSPE_EEF, @@ -1109,7 +1110,7 @@ def ProcessorFeatures { FeatureCCIDX, FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, - FeatureRCPC, FeatureRDM]; + FeatureRCPC, FeatureRDM, FeatureDisableXorMerge]; list NeoverseV2 = [HasV9_0aOps, FeatureBF16, FeatureSPE, FeaturePerfMon, FeatureETE, FeatureMatMulInt8, FeatureNEON, FeatureSVEBitPerm, FeatureFP16FML, @@ -1117,7 +1118,8 @@ def ProcessorFeatures { FeatureCCIDX, FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC]; + FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureFPAC, + FeatureDisableXorMerge]; list NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML, FeatureFullFP16, FeatureLS64, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3b268dcbca600..117fd30f60f49 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -517,7 +517,9 @@ def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), (vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>; def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), - (xor node:$op1, (xor node:$op2, node:$op3))]>; + (xor node:$op1, (xor node:$op2, node:$op3))], [{ + return N->getOpcode() != ISD::XOR || !Subtarget->disableXorMerge(); + }]>; def AArch64bcax : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_bcax node:$op1, node:$op2, node:$op3), (xor node:$op1, (and node:$op2, (vnot node:$op3)))]>; diff --git a/llvm/test/CodeGen/AArch64/eor3-merge.ll b/llvm/test/CodeGen/AArch64/eor3-merge.ll new file mode 100644 index 0000000000000..a91bd56d14d2d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/eor3-merge.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v1 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v2 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-v3 < %s | FileCheck --check-prefix=CHECK-MERGE %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-n2 < %s | FileCheck --check-prefix=CHECK-NOMERGE %s +; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 -mcpu=neoverse-n3 < %s | FileCheck --check-prefix=CHECK-MERGE %s + +define <8 x i8> @eor3_8x8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { +; CHECK-NOMERGE-LABEL: eor3_8x8: +; CHECK-NOMERGE: // %bb.0: +; CHECK-NOMERGE-NEXT: eor v1.8b, v1.8b, v2.8b +; CHECK-NOMERGE-NEXT: eor v0.8b, v1.8b, v0.8b +; CHECK-NOMERGE-NEXT: ret +; +; CHECK-MERGE-LABEL: eor3_8x8: +; CHECK-MERGE: // %bb.0: +; CHECK-MERGE-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-MERGE-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-MERGE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-MERGE-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; CHECK-MERGE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-MERGE-NEXT: ret + %4 = xor <8 x i8> %1, %2 + %5 = xor <8 x i8> %4, %0 + ret <8 x i8> %5 +} diff --git a/llvm/test/CodeGen/AArch64/eor3.ll b/llvm/test/CodeGen/AArch64/eor3.ll index eccd09131b525..594a73f70a7f9 100644 --- a/llvm/test/CodeGen/AArch64/eor3.ll +++ b/llvm/test/CodeGen/AArch64/eor3.ll @@ -277,3 +277,154 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) { ret <2 x i64> %4 } +define <1 x i64> @eor3_1x64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { +; SHA3-LABEL: eor3_1x64: +; SHA3: // %bb.0: +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: eor3_1x64: +; NOSHA3: // %bb.0: +; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b +; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b +; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_1x64: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: fmov d0, d1 +; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_1x64: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-SVE2-NEXT: ret + %4 = xor <1 x i64> %1, %2 + %5 = xor <1 x i64> %4, %0 + ret <1 x i64> %5 +} + +define <2 x i32> @eor3_2x32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { +; SHA3-LABEL: eor3_2x32: +; SHA3: // %bb.0: +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: eor3_2x32: +; NOSHA3: // %bb.0: +; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b +; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b +; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_2x32: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: fmov d0, d1 +; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_2x32: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-SVE2-NEXT: ret + %4 = xor <2 x i32> %1, %2 + %5 = xor <2 x i32> %4, %0 + ret <2 x i32> %5 +} + +define <4 x i16> @eor3_4x16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { +; SHA3-LABEL: eor3_4x16: +; SHA3: // %bb.0: +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: eor3_4x16: +; NOSHA3: // %bb.0: +; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b +; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b +; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_4x16: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: fmov d0, d1 +; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_4x16: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-SVE2-NEXT: ret + %4 = xor <4 x i16> %1, %2 + %5 = xor <4 x i16> %4, %0 + ret <4 x i16> %5 +} + +define <8 x i8> @eor3_8x8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { +; SHA3-LABEL: eor3_8x8: +; SHA3: // %bb.0: +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: eor3_8x8: +; NOSHA3: // %bb.0: +; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b +; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b +; NOSHA3-NEXT: ret +; +; SVE2-LABEL: eor3_8x8: +; SVE2: // %bb.0: +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: fmov d0, d1 +; SVE2-NEXT: ret +; +; SHA3-SVE2-LABEL: eor3_8x8: +; SHA3-SVE2: // %bb.0: +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2 +; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b +; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-SVE2-NEXT: ret + %4 = xor <8 x i8> %1, %2 + %5 = xor <8 x i8> %4, %0 + ret <8 x i8> %5 +}