Skip to content

Commit

Permalink
[AArch64][GISel] Legalize non-power-of-two G_CTTZ
Browse files Browse the repository at this point in the history
The main change here is to add a `widenScalarToNextPow2` before the
`clampScalar` so that non-power-of-two sizes between 32 and 64 are
turned into s64 count trailing zeroes.

However, if you make the legalisation rules depend on TypeIdx 0 (the
output), then you still get crashes for the s65 testcase, which I solved
by instead flipping the rules around to be about TypeIdx 1 (the input),
with a `scalarSameSizeAs` at the end to tie index 0 to index 1. This,
incidentally, is how things are written for `G_CTLZ`.

Differential Revision: https://reviews.llvm.org/D147602
  • Loading branch information
lenary committed Apr 13, 2023
1 parent 3fe7127 commit 41eba6c
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 18 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,8 +699,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_CTTZ)
.lowerIf(isVector(0))
.clampScalar(0, s32, s64)
.scalarSameSizeAs(1, 0)
.widenScalarToNextPow2(1, /*Min=*/32)
.clampScalar(1, s32, s64)
.scalarSameSizeAs(0, 1)
.legalIf([=](const LegalityQuery &Query) {
return (HasCSSC && typeInSet(0, {s32, s64})(Query));
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s8
; CHECK-CSSC: liveins: $w0
Expand All @@ -26,8 +25,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s8) = G_IMPLICIT_DEF
%cttz:_(s8) = G_CTTZ_ZERO_UNDEF %val(s8)
Expand All @@ -50,8 +48,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s16
; CHECK-CSSC: liveins: $w0
Expand All @@ -60,8 +57,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s16) = G_IMPLICIT_DEF
%cttz:_(s16) = G_CTTZ_ZERO_UNDEF %val(s16)
Expand Down
113 changes: 105 additions & 8 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s8
; CHECK-CSSC: liveins: $w0
Expand All @@ -27,8 +26,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s8) = G_IMPLICIT_DEF
%cttz:_(s8) = G_CTTZ %val(s8)
Expand All @@ -51,8 +49,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s16
; CHECK-CSSC: liveins: $w0
Expand All @@ -61,8 +58,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s16) = G_IMPLICIT_DEF
%cttz:_(s16) = G_CTTZ %val(s16)
Expand Down Expand Up @@ -166,3 +162,104 @@ body: |
RET_ReallyLR implicit $q0
...
---
name: s35
alignment: 4
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: s35
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
; CHECK-CSSC-LABEL: name: s35
; CHECK-CSSC: liveins: $x0
; CHECK-CSSC-NEXT: {{ $}}
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
; CHECK-CSSC-NEXT: $x0 = COPY [[CTTZ]](s64)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0
%1:_(s64) = COPY $x0
%0:_(s35) = G_TRUNC %1(s64)
%2:_(s35) = G_CTTZ %0(s35)
%3:_(s64) = G_ANYEXT %2(s35)
$x0 = COPY %3(s64)
RET_ReallyLR implicit $x0
...
---
name: s65
alignment: 4
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: s65
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR1]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]]
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
; CHECK-CSSC-LABEL: name: s65
; CHECK-CSSC: liveins: $x0, $x1
; CHECK-CSSC-NEXT: {{ $}}
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-CSSC-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-CSSC-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
; CHECK-CSSC-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
; CHECK-CSSC-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64)
; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]]
; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]]
; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64)
; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1
%1:_(s64) = COPY $x0
%2:_(s64) = COPY $x1
%3:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
%0:_(s65) = G_TRUNC %3(s128)
%4:_(s65) = G_CTTZ %0(s65)
%7:_(s128) = G_ANYEXT %4(s65)
%5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %7(s128)
$x0 = COPY %5(s64)
$x1 = COPY %6(s64)
RET_ReallyLR implicit $x0, implicit $x1
...

0 comments on commit 41eba6c

Please sign in to comment.