Skip to content

Commit 6c0154f

Browse files
committed
[AArch64][GlobalISel] Ensure vector types match in shift instructions
This adds legalizations for shifts with mis-matching shift types, tha can be created from trunc(shift patterns. This helps reduce the amount of fallbacks.
1 parent 23c51f1 commit 6c0154f

File tree

2 files changed

+78
-36
lines changed

2 files changed

+78
-36
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
222222
.clampNumElements(0, v2s64, v2s64)
223223
.moreElementsToNextPow2(0)
224224
.minScalarSameAs(1, 0)
225-
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
225+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
226+
.minScalarEltSameAsIf(isVector(0), 1, 0)
227+
.maxScalarEltSameAsIf(isVector(0), 1, 0);
226228

227229
getActionDefinitionsBuilder(G_PTR_ADD)
228230
.legalFor({{p0, s64}, {v2p0, v2s64}})

llvm/test/CodeGen/AArch64/arm64-vshift.ll

Lines changed: 75 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,6 @@
101101
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s
102102
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d
103103
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount
104-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8
105-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v4i64_v4i16
106-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8
107-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v4i64_v4i16
108-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shl_trunc_v4i64_v4i16
109104

110105
define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
111106
; CHECK-LABEL: sqshl8b:
@@ -4381,48 +4376,82 @@ define <8 x i16> @signbits_vashr(<8 x i16> %a) {
43814376
}
43824377

43834378
define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) {
4384-
; CHECK-LABEL: lshr_trunc_v2i64_v2i8:
4385-
; CHECK: // %bb.0:
4386-
; CHECK-NEXT: shrn v0.2s, v0.2d, #16
4387-
; CHECK-NEXT: ret
4379+
; CHECK-SD-LABEL: lshr_trunc_v2i64_v2i8:
4380+
; CHECK-SD: // %bb.0:
4381+
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #16
4382+
; CHECK-SD-NEXT: ret
4383+
;
4384+
; CHECK-GI-LABEL: lshr_trunc_v2i64_v2i8:
4385+
; CHECK-GI: // %bb.0:
4386+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
4387+
; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #16
4388+
; CHECK-GI-NEXT: ret
43884389
%b = lshr <2 x i64> %a, <i64 16, i64 16>
43894390
%c = trunc <2 x i64> %b to <2 x i8>
43904391
ret <2 x i8> %c
43914392
}
43924393

43934394
define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) {
4394-
; CHECK-LABEL: lshr_trunc_v4i64_v4i16:
4395-
; CHECK: // %bb.0:
4396-
; CHECK-NEXT: xtn v1.2s, v1.2d
4397-
; CHECK-NEXT: xtn v0.2s, v0.2d
4398-
; CHECK-NEXT: ushr v1.2s, v1.2s, #8
4399-
; CHECK-NEXT: ushr v0.2s, v0.2s, #8
4400-
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
4401-
; CHECK-NEXT: ret
4395+
; CHECK-SD-LABEL: lshr_trunc_v4i64_v4i16:
4396+
; CHECK-SD: // %bb.0:
4397+
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
4398+
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
4399+
; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #8
4400+
; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #8
4401+
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
4402+
; CHECK-SD-NEXT: ret
4403+
;
4404+
; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16:
4405+
; CHECK-GI: // %bb.0:
4406+
; CHECK-GI-NEXT: adrp x8, .LCPI270_0
4407+
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
4408+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI270_0]
4409+
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
4410+
; CHECK-GI-NEXT: neg v1.4s, v2.4s
4411+
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
4412+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
4413+
; CHECK-GI-NEXT: ret
44024414
%b = lshr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
44034415
%c = trunc <4 x i64> %b to <4 x i16>
44044416
ret <4 x i16> %c
44054417
}
44064418

44074419
define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) {
4408-
; CHECK-LABEL: ashr_trunc_v2i64_v2i8:
4409-
; CHECK: // %bb.0:
4410-
; CHECK-NEXT: shrn v0.2s, v0.2d, #16
4411-
; CHECK-NEXT: ret
4420+
; CHECK-SD-LABEL: ashr_trunc_v2i64_v2i8:
4421+
; CHECK-SD: // %bb.0:
4422+
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #16
4423+
; CHECK-SD-NEXT: ret
4424+
;
4425+
; CHECK-GI-LABEL: ashr_trunc_v2i64_v2i8:
4426+
; CHECK-GI: // %bb.0:
4427+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
4428+
; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #16
4429+
; CHECK-GI-NEXT: ret
44124430
%b = ashr <2 x i64> %a, <i64 16, i64 16>
44134431
%c = trunc <2 x i64> %b to <2 x i8>
44144432
ret <2 x i8> %c
44154433
}
44164434

44174435
define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) {
4418-
; CHECK-LABEL: ashr_trunc_v4i64_v4i16:
4419-
; CHECK: // %bb.0:
4420-
; CHECK-NEXT: xtn v1.2s, v1.2d
4421-
; CHECK-NEXT: xtn v0.2s, v0.2d
4422-
; CHECK-NEXT: ushr v1.2s, v1.2s, #8
4423-
; CHECK-NEXT: ushr v0.2s, v0.2s, #8
4424-
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
4425-
; CHECK-NEXT: ret
4436+
; CHECK-SD-LABEL: ashr_trunc_v4i64_v4i16:
4437+
; CHECK-SD: // %bb.0:
4438+
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
4439+
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
4440+
; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #8
4441+
; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #8
4442+
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
4443+
; CHECK-SD-NEXT: ret
4444+
;
4445+
; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16:
4446+
; CHECK-GI: // %bb.0:
4447+
; CHECK-GI-NEXT: adrp x8, .LCPI272_0
4448+
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
4449+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI272_0]
4450+
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
4451+
; CHECK-GI-NEXT: neg v1.4s, v2.4s
4452+
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
4453+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
4454+
; CHECK-GI-NEXT: ret
44264455
%b = ashr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
44274456
%c = trunc <4 x i64> %b to <4 x i16>
44284457
ret <4 x i16> %c
@@ -4446,12 +4475,23 @@ define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) {
44464475
}
44474476

44484477
define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) {
4449-
; CHECK-LABEL: shl_trunc_v4i64_v4i16:
4450-
; CHECK: // %bb.0:
4451-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
4452-
; CHECK-NEXT: xtn v0.4h, v0.4s
4453-
; CHECK-NEXT: shl v0.4h, v0.4h, #8
4454-
; CHECK-NEXT: ret
4478+
; CHECK-SD-LABEL: shl_trunc_v4i64_v4i16:
4479+
; CHECK-SD: // %bb.0:
4480+
; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
4481+
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
4482+
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
4483+
; CHECK-SD-NEXT: ret
4484+
;
4485+
; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16:
4486+
; CHECK-GI: // %bb.0:
4487+
; CHECK-GI-NEXT: adrp x8, .LCPI274_0
4488+
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
4489+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI274_0]
4490+
; CHECK-GI-NEXT: uzp1 v1.4s, v2.4s, v2.4s
4491+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
4492+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
4493+
; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
4494+
; CHECK-GI-NEXT: ret
44554495
%b = shl <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
44564496
%c = trunc <4 x i64> %b to <4 x i16>
44574497
ret <4 x i16> %c

0 commit comments

Comments
 (0)