Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,18 @@ def G_URSHR: AArch64GenericInstruction {
let hasSideEffects = 0;
}

def G_SLI: AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
let hasSideEffects = 0;
}

def G_SRI: AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
let hasSideEffects = 0;
}

// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
Expand Down Expand Up @@ -322,6 +334,9 @@ def : GINodeEquiv<G_SQSHLU, AArch64sqshlui>;
def : GINodeEquiv<G_SRSHR, AArch64srshri>;
def : GINodeEquiv<G_URSHR, AArch64urshri>;

def : GINodeEquiv<G_SLI, AArch64vsli>;
def : GINodeEquiv<G_SRI, AArch64vsri>;

def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
Expand Down
16 changes: 15 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return false;
}
}
case Intrinsic::aarch64_neon_vsli: {
MIB.buildInstr(
AArch64::G_SLI, {MI.getOperand(0)},
{MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
MI.eraseFromParent();
break;
}
case Intrinsic::aarch64_neon_vsri: {
MIB.buildInstr(
AArch64::G_SRI, {MI.getOperand(0)},
{MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
MI.eraseFromParent();
break;
}
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
Expand Down Expand Up @@ -2598,4 +2612,4 @@ bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
MRI.replaceRegWith(Dst, Fin);
MI.eraseFromParent();
return true;
}
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
case AArch64::G_PMULL:
case AArch64::G_SLI:
case AArch64::G_SRI:
return true;
case TargetOpcode::G_INTRINSIC:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
Expand Down Expand Up @@ -613,6 +615,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
case AArch64::G_SLI:
case AArch64::G_SRI:
return true;
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
Expand Down
126 changes: 110 additions & 16 deletions llvm/test/CodeGen/AArch64/arm64-vshift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,6 @@
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for sli8b
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4h
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2s
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d_imm0
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli16b
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli8h
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4s
; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2d

define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8b:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -4288,6 +4278,110 @@ declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounw
declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone

define <8 x i8> @sri8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
ret <8 x i8> %tmp3
}

define <4 x i16> @sri4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
ret <4 x i16> %tmp3
}

define <2 x i32> @sri2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
ret <2 x i32> %tmp3
}

define <1 x i64> @sri1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
ret <1 x i64> %tmp3
}

define <16 x i8> @sri16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
ret <16 x i8> %tmp3
}

define <8 x i16> @sri8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
ret <8 x i16> %tmp3
}

define <4 x i32> @sri4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
ret <4 x i32> %tmp3
}

define <2 x i64> @sri2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
ret <2 x i64> %tmp3
}

define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: ashr_v1i64:
; CHECK-SD: // %bb.0:
Expand Down Expand Up @@ -4532,9 +4626,9 @@ define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI270_0
; CHECK-GI-NEXT: adrp x8, .LCPI278_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI270_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI278_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
Expand Down Expand Up @@ -4573,9 +4667,9 @@ define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI272_0
; CHECK-GI-NEXT: adrp x8, .LCPI280_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI272_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI280_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
Expand Down Expand Up @@ -4613,9 +4707,9 @@ define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI274_0
; CHECK-GI-NEXT: adrp x8, .LCPI282_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI274_0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI282_0]
; CHECK-GI-NEXT: uzp1 v1.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
Expand Down
Loading