diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 75354e4098fb4..48aee9ce7344b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -270,6 +270,18 @@ def G_URSHR: AArch64GenericInstruction { let hasSideEffects = 0; } +def G_SLI: AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3); + let hasSideEffects = 0; +} + +def G_SRI: AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -322,6 +334,9 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 8951ccfbd3352..a430c81134560 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1949,6 +1949,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return false; } } + case Intrinsic::aarch64_neon_vsli: { + MIB.buildInstr( + AArch64::G_SLI, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()}); + MI.eraseFromParent(); + break; + } + case Intrinsic::aarch64_neon_vsri: { + MIB.buildInstr( + AArch64::G_SRI, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()}); + MI.eraseFromParent(); + break; + } case Intrinsic::aarch64_neon_abs: { // Lower the intrinsic to G_ABS. MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)}); @@ -2598,4 +2612,4 @@ bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI, MRI.replaceRegWith(Dst, Fin); MI.eraseFromParent(); return true; -} \ No newline at end of file +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 84bc3f1e14a7a..44f8fd8ad6d35 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -575,6 +575,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: case AArch64::G_PMULL: + case AArch64::G_SLI: + case AArch64::G_SRI: return true; case TargetOpcode::G_INTRINSIC: switch (cast(MI).getIntrinsicID()) { @@ -613,6 +615,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, case TargetOpcode::G_INSERT_VECTOR_ELT: case TargetOpcode::G_BUILD_VECTOR: case TargetOpcode::G_BUILD_VECTOR_TRUNC: + case AArch64::G_SLI: + case AArch64::G_SRI: return true; case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: switch (cast(MI).getIntrinsicID()) { diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index a316a4bc543b5..29c06b8fa228c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -2,16 +2,6 @@ ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for sli8b -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4h -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2s -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d_imm0 -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli16b -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli8h -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4s -; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2d - define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl8b: ; CHECK: // %bb.0: @@ -4288,6 +4278,110 @@ declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounw declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone +define <8 x i8> @sri8b(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sri v0.8b, v1.8b, #1 +; CHECK-NEXT: ret + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sri4h(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sri v0.4h, v1.4h, #1 +; CHECK-NEXT: ret + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sri2s(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sri v0.2s, v1.2s, #1 +; CHECK-NEXT: ret + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @sri1d(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri1d: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sri d0, d1, #1 +; CHECK-NEXT: ret + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @sri16b(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: sri v0.16b, v1.16b, #1 +; CHECK-NEXT: ret + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sri8h(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: sri v0.8h, v1.8h, #1 +; CHECK-NEXT: ret + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sri4s(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: sri v0.4s, v1.4s, #1 +; CHECK-NEXT: ret + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sri2d(ptr %A, ptr %B) nounwind { +; CHECK-LABEL: sri2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: sri v0.2d, v1.2d, #1 +; CHECK-NEXT: ret + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) + ret <2 x i64> %tmp3 +} + define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-SD-LABEL: ashr_v1i64: ; CHECK-SD: // %bb.0: @@ -4532,9 +4626,9 @@ define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) { ; ; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI270_0 +; CHECK-GI-NEXT: adrp x8, .LCPI278_0 ; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI270_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI278_0] ; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s ; CHECK-GI-NEXT: neg v1.4s, v2.4s ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s @@ -4573,9 +4667,9 @@ define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) { ; ; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI272_0 +; CHECK-GI-NEXT: adrp x8, .LCPI280_0 ; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI272_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI280_0] ; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s ; CHECK-GI-NEXT: neg v1.4s, v2.4s ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s @@ -4613,9 +4707,9 @@ define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) { ; ; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI274_0 +; CHECK-GI-NEXT: adrp x8, .LCPI282_0 ; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI274_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI282_0] ; CHECK-GI-NEXT: uzp1 v1.4s, v2.4s, v2.4s ; CHECK-GI-NEXT: xtn v0.4h, v0.4s ; CHECK-GI-NEXT: xtn v1.4h, v1.4s