-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][GlobalISel] Added support for neon right shifts #170832
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/JoshdRod/shift-fallbacks/01-qshl-fallbacks
Are you sure you want to change the base?
[AArch64][GlobalISel] Added support for neon right shifts #170832
Conversation
Added G_SQSHLU node, which lowers the llvm ir intrinsic aarch64_neon_sqshlu to the machine intrinsic sqshlu. Generated code is slightly less efficient compare to SDAG.
In legalisation, the IR intrinsic is lowered to two GI instructions: a vector right shift (G_VASHR), and a truncate with saturation (G_TRUNC_SSAT_S). The result of the G_VASHR is the operand of the G_TRUNC_SSAT_S. Vectors that are treated as i64/i32 are dealt with in TableGen, so are not handled here.
In legalisation, the IR intrinsic is lowered to two GI instructions: a vector right shift (G_VASHR), and an unsigned truncate with saturation (G_TRUNC_SSAT_U). The result of the G_VASHR is the operand of the G_TRUNC_SSAT_U.
GISel now legalises sqrshrn into G_TRUNC_SSAT_S(G_SRSHR(vec, shift)).
GlobalISel now legalises sqrshrun to G_TRUNC_SSAT_U(G_SRSHR(vec, shift)).
GlobalISel now lowers uqrshrn to G_TRUNC_USATU(G_URSHR(vec, shift)).
GlobalISel now lowers uqshrn to G_TRUNC_USATU(VLSHR(vec, shift)).
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Joshua Rodriguez (JoshdRod) ChangesMany neon right shift intrinsics were not supported by GlobalISel, mainly due to a lack of legalisation logic. This logic has now been implemented. Patch is 23.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170832.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..75354e4098fb4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -252,6 +252,24 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SQSHLU : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SRSHR: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_URSHR: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +318,10 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_SQSHLU, AArch64sqshlui>;
+def : GINodeEquiv<G_SRSHR, AArch64srshri>;
+def : GINodeEquiv<G_URSHR, AArch64urshri>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..0c8472b759132 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1857,6 +1857,82 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
case Intrinsic::aarch64_neon_srhadd:
return LowerBinOp(TargetOpcode::G_SAVGCEIL);
+ case Intrinsic::aarch64_neon_sqshrn: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_VASHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr}); MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_sqshrun: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_VASHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
+ MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_sqrshrn: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_SRSHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
+ MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_sqrshrun: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_SRSHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
+ MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_uqrshrn: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_URSHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
+ MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_uqshrn: {
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ {
+ // Create right shift instruction. Get v. register the output is written to
+ auto Shr = MIB.buildInstr(AArch64::G_VLSHR, {MRI.getType(MI.getOperand(2).getReg())}, {MI.getOperand(2), MI.getOperand(3).getImm()});
+ // Build the narrow intrinsic, taking in the v. register of the shift
+ MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr}); MI.eraseFromParent();
+ }
+ break;
+ }
+ case Intrinsic::aarch64_neon_sqshlu: {
+ // Check if last operand is constant vector dup
+ auto shiftAmount = isConstantOrConstantSplatVector(*MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
+ if (shiftAmount) {
+ // If so, create a new intrinsic with the correct shift amount
+ MIB.buildInstr(AArch64::G_SQSHLU, {MI.getOperand(0)}, {MI.getOperand(2)}).addImm(shiftAmount->getSExtValue());
+ MI.eraseFromParent();
+ return true;
+ } else {
+ return false;
+ }
+ }
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 652a31f4e65f2..173d4d9b47b1e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -489,6 +489,12 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqrshl:
case Intrinsic::aarch64_neon_uqrshl:
+ case Intrinsic::aarch64_neon_sqshrn:
+ case Intrinsic::aarch64_neon_sqshrun:
+ case Intrinsic::aarch64_neon_sqrshrn:
+ case Intrinsic::aarch64_neon_sqrshrun:
+ case Intrinsic::aarch64_neon_uqshrn:
+ case Intrinsic::aarch64_neon_uqrshrn:
case Intrinsic::aarch64_crypto_sha1c:
case Intrinsic::aarch64_crypto_sha1p:
case Intrinsic::aarch64_crypto_sha1m:
@@ -1072,6 +1078,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Index needs to be a GPR.
OpRegBankIdx[2] = PMI_FirstGPR;
break;
+ case AArch64::G_SQSHLU:
+ // Destination and source need to be FPRs.
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[1] = PMI_FirstFPR;
+
+ // Shift Index needs to be a GPR.
+ OpRegBankIdx[2] = PMI_FirstGPR;
+ break;
+
case TargetOpcode::G_INSERT_VECTOR_ELT:
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index f33d41b0dd6ef..eb86728e6d22f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -3,20 +3,11 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for test_sqrshl_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshl_s64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqdmulls_scalar
-
define i32 @test_sqrshl_s32(float noundef %a){
; CHECK-LABEL: test_sqrshl_s32:
; CHECK: // %bb.0: // %entry
@@ -236,3 +227,5 @@ define i64 @test_sqdmulls_scalar(float %A){
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt)
ret i64 %prod
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 34843835d284a..df3ca03ddcb62 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,73 +2,20 @@
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for sqshlu8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu2d
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrn4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshrun4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrn4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqrshrun4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqrshrn4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn1s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for uqshrn4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d_imm0
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2d
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount
+; CHECK-GI: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d_imm0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d
define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8b:
@@ -142,23 +89,13 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
}
define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: sqshl_scalar:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x0]
-; CHECK-SD-NEXT: ldr x9, [x1]
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d1, x9
-; CHECK-SD-NEXT: sqshl d0, d0, d1
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqshl_scalar:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr d0, [x0]
-; CHECK-GI-NEXT: ldr d1, [x1]
-; CHECK-GI-NEXT: sqshl d0, d0, d1
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqshl_scalar:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sqshl d0, d0, d1
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
@@ -362,23 +299,13 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
}
define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: uqshl_scalar:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x0]
-; CHECK-SD-NEXT: ldr x9, [x1]
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d1, x9
-; CHECK-SD-NEXT: uqshl d0, d0, d1
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: uqshl_scalar:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr d0, [x0]
-; CHECK-GI-NEXT: ldr d1, [x1]
-; CHECK-GI-NEXT: uqshl d0, d0, d1
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: uqshl_scalar:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: uqshl d0, d0, d1
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
@@ -938,23 +865,13 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
}
define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: sqrshl_scalar:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x0]
-; CHECK-SD-NEXT: ldr x9, [x1]
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d1, x9
-; CHECK-SD-NEXT: sqrshl d0, d0, d1
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqrshl_scalar:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr d0, [x0]
-; CHECK-GI-NEXT: ldr d1, [x1]
-; CHECK-GI-NEXT: sqrshl d0, d0, d1
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqrshl_scalar:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sqrshl d0, d0, d1
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
@@ -964,10 +881,9 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqrshl_scalar_constant:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x9, [x0]
-; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov x8, #1 // =0x1
+; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: fmov d1, x8
-; CHECK-SD-NEXT: fmov d0, x9
; CHECK-SD-NEXT: sqrshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
@@ -1064,23 +...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Many neon right shift intrinsics were not supported by GlobalISel, mainly due to a lack of legalisation logic. This logic has now been implemented.