Skip to content

Commit

Permalink
rev16 instruction is being generated for a half word byte swap on a 3…
Browse files Browse the repository at this point in the history
…2-bit input as a bswap+rotr. This is not true for a 64-bit input.

This patch implements the rev16 instruction for a AArch64 backend for a half word byte swap on a 64-bit input.

Differential Revision: https://reviews.llvm.org/D122643
  • Loading branch information
bipmis committed Apr 5, 2022
1 parent fe11344 commit edb4520
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -2086,6 +2086,10 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;

def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
(and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
(REV16Xr GPR64:$Rn)>;

//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-rev.ll
Expand Up @@ -679,3 +679,26 @@ define void @test_bswap32_narrow(i32* %p0, i16* %p1) nounwind {
ret void
}
declare i32 @gid_tbl_len(...)

; 64-bit REV16 is *not* a swap then a 16-bit rotation:
; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
; 01234567 ->(rev16) 10325476
; Optimize patterns where rev16 can be generated for a 64-bit input.
define i64 @test_rev16_x_hwbyteswaps(i64 %a) nounwind {
; CHECK-LABEL: test_rev16_x_hwbyteswaps:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rev16 x0, x0
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_rev16_x_hwbyteswaps:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: rev16 x0, x0
; GISEL-NEXT: ret
entry:
%0 = lshr i64 %a, 8
%1 = and i64 %0, 71777214294589695
%2 = shl i64 %a, 8
%3 = and i64 %2, -71777214294589696
%4 = or i64 %1, %3
ret i64 %4
}

0 comments on commit edb4520

Please sign in to comment.