Skip to content

Commit

Permalink
[GlobalISel][AArch64] Legalize + select some llvm.ctlz.* intrinsics
Browse files Browse the repository at this point in the history
Legalize/select llvm.ctlz.*

Add select-ctlz to show that we actually select them. Update arm64-clrsb.ll and
arm64-vclz.ll to show that we perform valid transformations in optimized builds,
and document where GISel can improve.

Differential Revision: https://reviews.llvm.org/D58155

llvm-svn: 354299
  • Loading branch information
Jessica Paquette committed Feb 18, 2019
1 parent d8b4efc commit b53e0f4
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 1 deletion.
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Expand Up @@ -2099,6 +2099,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
case G_CTLZ:
return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
Expand Up @@ -457,6 +457,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
})
.minScalarSameAs(1, 0);

getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
{s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.scalarize(1);

computeTables();
verify(*ST.getInstrInfo());
}
Expand Down
Expand Up @@ -322,7 +322,7 @@
# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
# DEBUG: .. the first uncovered type index: 2, OK
#
# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
Expand Down
200 changes: 200 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir
@@ -0,0 +1,200 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s

name: test_v8s8
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; CHECK-LABEL: name: test_v8s8
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[CLZv8i8_:%[0-9]+]]:fpr64 = CLZv8i8 [[COPY]]
; CHECK: $d0 = COPY [[CLZv8i8_]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<8 x s8>) = COPY $d0
%1:fpr(<8 x s8>) = G_CTLZ %0(<8 x s8>)
$d0 = COPY %1(<8 x s8>)
RET_ReallyLR implicit $d0
...
---
name: test_v4s16
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; CHECK-LABEL: name: test_v4s16
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[CLZv4i16_:%[0-9]+]]:fpr64 = CLZv4i16 [[COPY]]
; CHECK: $d0 = COPY [[CLZv4i16_]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
%1:fpr(<4 x s16>) = G_CTLZ %0(<4 x s16>)
$d0 = COPY %1(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: test_v2s32
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; CHECK-LABEL: name: test_v2s32
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[CLZv2i32_:%[0-9]+]]:fpr64 = CLZv2i32 [[COPY]]
; CHECK: $d0 = COPY [[CLZv2i32_]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<2 x s32>) = COPY $d0
%1:fpr(<2 x s32>) = G_CTLZ %0(<2 x s32>)
$d0 = COPY %1(<2 x s32>)
RET_ReallyLR implicit $d0
...
---
name: test_s64
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0
; CHECK-LABEL: name: test_s64
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]]
; CHECK: [[CLZXr:%[0-9]+]]:gpr64 = CLZXr [[COPY1]]
; CHECK: $d0 = COPY [[CLZXr]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
%2:gpr(s64) = COPY %0(s64)
%1:gpr(s64) = G_CTLZ %2(s64)
$d0 = COPY %1(s64)
RET_ReallyLR implicit $d0
...
---
name: test_s32
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0
; CHECK-LABEL: name: test_s32
; CHECK: liveins: $s0
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
; CHECK: [[CLZWr:%[0-9]+]]:gpr32 = CLZWr [[COPY1]]
; CHECK: $s0 = COPY [[CLZWr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%2:gpr(s32) = COPY %0(s32)
%1:gpr(s32) = G_CTLZ %2(s32)
$s0 = COPY %1(s32)
RET_ReallyLR implicit $s0
...
---
name: test_v16s8
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0
; CHECK-LABEL: name: test_v16s8
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[CLZv16i8_:%[0-9]+]]:fpr128 = CLZv16i8 [[COPY]]
; CHECK: $q0 = COPY [[CLZv16i8_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<16 x s8>) = COPY $q0
%1:fpr(<16 x s8>) = G_CTLZ %0(<16 x s8>)
$q0 = COPY %1(<16 x s8>)
RET_ReallyLR implicit $q0
...
---
name: test_v8s16
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0
; CHECK-LABEL: name: test_v8s16
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[CLZv8i16_:%[0-9]+]]:fpr128 = CLZv8i16 [[COPY]]
; CHECK: $q0 = COPY [[CLZv8i16_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<8 x s16>) = COPY $q0
%1:fpr(<8 x s16>) = G_CTLZ %0(<8 x s16>)
$q0 = COPY %1(<8 x s16>)
RET_ReallyLR implicit $q0
...
---
name: test_v4s32
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0
; CHECK-LABEL: name: test_v4s32
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[CLZv4i32_:%[0-9]+]]:fpr128 = CLZv4i32 [[COPY]]
; CHECK: $q0 = COPY [[CLZv4i32_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<4 x s32>) = COPY $q0
%1:fpr(<4 x s32>) = G_CTLZ %0(<4 x s32>)
$q0 = COPY %1(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: test_v2s64
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0
; CHECK-LABEL: name: test_v2s64
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
; CHECK: [[CTLZ:%[0-9]+]]:fpr(<2 x s64>) = G_CTLZ [[COPY]](<2 x s64>)
; CHECK: $q0 = COPY [[CTLZ]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<2 x s64>) = COPY $q0
%1:fpr(<2 x s64>) = G_CTLZ %0(<2 x s64>)
$q0 = COPY %1(<2 x s64>)
RET_ReallyLR implicit $q0
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL,FALLBACK

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

Expand All @@ -7,6 +8,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) #0
declare i64 @llvm.ctlz.i64(i64, i1) #1

; Function Attrs: nounwind ssp
; FALLBACK-NOT: remark{{.*}}clrsb32
define i32 @clrsb32(i32 %x) #2 {
entry:
%shr = ashr i32 %x, 31
Expand All @@ -18,9 +20,15 @@ entry:
ret i32 %0
; CHECK-LABEL: clrsb32
; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]]

; FIXME: We should produce the same result here to save some code size. After
; that, we can remove the GISEL special casing.
; GISEL-LABEL: clrsb32
; GISEL: clz
}

; Function Attrs: nounwind ssp
; FALLBACK-NOT: remark{{.*}}clrsb64
define i64 @clrsb64(i64 %x) #3 {
entry:
%shr = ashr i64 %x, 63
Expand All @@ -32,4 +40,6 @@ entry:
ret i64 %0
; CHECK-LABEL: clrsb64
; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]]
; GISEL-LABEL: clrsb64
; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]]
}
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-vclz.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s

; FALLBACK-NOT: remark{{.*}}test_vclz_u8
define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u8:
; CHECK: clz.8b v0, v0
Expand All @@ -8,6 +10,7 @@ define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
ret <8 x i8> %vclz.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_s8
define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s8:
; CHECK: clz.8b v0, v0
Expand All @@ -16,6 +19,7 @@ define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
ret <8 x i8> %vclz.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_u16
define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u16:
; CHECK: clz.4h v0, v0
Expand All @@ -24,6 +28,7 @@ define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
ret <4 x i16> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_s16
define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s16:
; CHECK: clz.4h v0, v0
Expand All @@ -32,6 +37,7 @@ define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
ret <4 x i16> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_u32
define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u32:
; CHECK: clz.2s v0, v0
Expand All @@ -40,6 +46,7 @@ define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
ret <2 x i32> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_s32
define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s32:
; CHECK: clz.2s v0, v0
Expand All @@ -48,18 +55,21 @@ define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
ret <2 x i32> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_u64
define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u64:
%vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
ret <1 x i64> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclz_s64
define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s64:
%vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
ret <1 x i64> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_u8
define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u8:
; CHECK: clz.16b v0, v0
Expand All @@ -68,6 +78,7 @@ define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
ret <16 x i8> %vclz.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_s8
define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s8:
; CHECK: clz.16b v0, v0
Expand All @@ -76,6 +87,7 @@ define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
ret <16 x i8> %vclz.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_u16
define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u16:
; CHECK: clz.8h v0, v0
Expand All @@ -84,6 +96,7 @@ define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
ret <8 x i16> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_s16
define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s16:
; CHECK: clz.8h v0, v0
Expand All @@ -92,6 +105,7 @@ define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
ret <8 x i16> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_u32
define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u32:
; CHECK: clz.4s v0, v0
Expand All @@ -100,6 +114,7 @@ define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
ret <4 x i32> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_s32
define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s32:
; CHECK: clz.4s v0, v0
Expand All @@ -108,12 +123,14 @@ define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
ret <4 x i32> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_u64
define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u64:
%vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
ret <2 x i64> %vclz1.i
}

; FALLBACK-NOT: remark{{.*}}test_vclzq_s64
define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s64:
%vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
Expand Down

0 comments on commit b53e0f4

Please sign in to comment.