Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Generate AND in place of CSEL for predicated CTTZ
This patch implements a for a target specific optimization that replaces the cmp and csel from cttz with an and mask. Recommitted with a fix for truncated value sizes. Differential Revision: https://reviews.llvm.org/D123782
- Loading branch information
1 parent
d60ae47
commit 534ea8b
Showing
2 changed files
with
206 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s | ||
|
||
;; Check the transformation | ||
;; CSEL 0, cttz, cc -> AND cttz numbits-1 | ||
;; for cttz in the case of i32 and i64 respectively | ||
|
||
;; Cases for which the optimzation takes place | ||
define i32 @cttzi32(i32 %x) { | ||
; CHECK-LABEL: cttzi32: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w8, w0 | ||
; CHECK-NEXT: clz w8, w8 | ||
; CHECK-NEXT: and w0, w8, #0x1f | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp eq i32 %x, 0 | ||
%2 = select i1 %1, i32 0, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
define i64 @cttzi64(i64 %x) { | ||
; CHECK-LABEL: cttzi64: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit x8, x0 | ||
; CHECK-NEXT: clz x8, x8 | ||
; CHECK-NEXT: and x0, x8, #0x3f | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) | ||
%1 = icmp eq i64 %x, 0 | ||
%2 = select i1 %1, i64 0, i64 %0 | ||
ret i64 %2 | ||
} | ||
|
||
define i32 @cttzi32ne(i32 %x) { | ||
; CHECK-LABEL: cttzi32ne: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w8, w0 | ||
; CHECK-NEXT: clz w8, w8 | ||
; CHECK-NEXT: and w0, w8, #0x1f | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp ne i32 %x, 0 | ||
%2 = select i1 %1, i32 %0, i32 0 | ||
ret i32 %2 | ||
} | ||
|
||
define i64 @cttzi64ne(i64 %x) { | ||
; CHECK-LABEL: cttzi64ne: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit x8, x0 | ||
; CHECK-NEXT: clz x8, x8 | ||
; CHECK-NEXT: and x0, x8, #0x3f | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) | ||
%1 = icmp ne i64 %x, 0 | ||
%2 = select i1 %1, i64 %0, i64 0 | ||
ret i64 %2 | ||
} | ||
|
||
define i32 @cttztrunc(i64 %x) { | ||
; CHECK-LABEL: cttztrunc: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit x8, x0 | ||
; CHECK-NEXT: clz x8, x8 | ||
; CHECK-NEXT: and w0, w8, #0x3f | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) | ||
%1 = icmp eq i64 %x, 0 | ||
%2 = select i1 %1, i64 0, i64 %0 | ||
%3 = trunc i64 %2 to i32 | ||
ret i32 %3 | ||
} | ||
|
||
;; Cases for which the optimization does not take place | ||
define i32 @cttzne(i32 %x) { | ||
; CHECK-LABEL: cttzne: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w8, w0 | ||
; CHECK-NEXT: cmp w0, #0 | ||
; CHECK-NEXT: clz w8, w8 | ||
; CHECK-NEXT: csel w0, wzr, w8, ne | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp ne i32 %x, 0 | ||
%2 = select i1 %1, i32 0, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
define i32 @cttzxnot0(i32 %x) { | ||
; CHECK-LABEL: cttzxnot0: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w8, w0 | ||
; CHECK-NEXT: cmp w0, #10 | ||
; CHECK-NEXT: clz w8, w8 | ||
; CHECK-NEXT: csel w0, wzr, w8, eq | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp eq i32 %x, 10 | ||
%2 = select i1 %1, i32 0, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
define i32 @cttzlhsnot0(i32 %x) { | ||
; CHECK-LABEL: cttzlhsnot0: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w9, w0 | ||
; CHECK-NEXT: mov w8, #10 | ||
; CHECK-NEXT: clz w9, w9 | ||
; CHECK-NEXT: cmp w0, #0 | ||
; CHECK-NEXT: csel w0, w8, w9, eq | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp eq i32 %x, 0 | ||
%2 = select i1 %1, i32 10, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
define i32 @notcttz(i32 %x) { | ||
; CHECK-LABEL: notcttz: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: clz w8, w0 | ||
; CHECK-NEXT: cmp w0, #0 | ||
; CHECK-NEXT: csel w0, wzr, w8, eq | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true) | ||
%1 = icmp eq i32 %x, 0 | ||
%2 = select i1 %1, i32 0, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
define i32 @cttzlhsnotx(i32 %x, i32 %y) { | ||
; CHECK-LABEL: cttzlhsnotx: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: rbit w8, w0 | ||
; CHECK-NEXT: cmp w1, #0 | ||
; CHECK-NEXT: clz w8, w8 | ||
; CHECK-NEXT: csel w0, wzr, w8, eq | ||
; CHECK-NEXT: ret | ||
entry: | ||
%0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) | ||
%1 = icmp eq i32 %y, 0 | ||
%2 = select i1 %1, i32 0, i32 %0 | ||
ret i32 %2 | ||
} | ||
|
||
declare i32 @llvm.cttz.i32(i32, i1) | ||
|
||
declare i64 @llvm.cttz.i64(i64, i1) | ||
|
||
declare i32 @llvm.ctlz.i32(i32, i1) |