Skip to content

Commit

Permalink
[PowerPC] Implement P10 Byte Reverse Insructions
Browse files Browse the repository at this point in the history
Generate brh, brw and brd instructions for byte-swap operations
on P10 and generating a single instruction for a 32-bit swap followed
by a 16-bit right shift.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D140414
  • Loading branch information
lei137 committed Dec 21, 2022
1 parent 3e65ad7 commit 7a7e910
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 8 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Expand Up @@ -1626,6 +1626,9 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
// 4 Cycles Permute operations, 1 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
(instrs
BRD,
BRH, BRH8,
BRW, BRW8,
LVSL,
LVSR,
LXVKQ,
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -3936,9 +3936,19 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {

switch (N->getOpcode()) {
default: break;
case ISD::SRL:
// If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
// uses the BRH instruction.
if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
N->getOperand(0).getOpcode() == ISD::BSWAP) {
auto &OpRight = N->getOperand(1);
ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
if (SRLConst && SRLConst->getSExtValue() == 16)
return false;
}
LLVM_FALLTHROUGH;
case ISD::ROTL:
case ISD::SHL:
case ISD::SRL:
case ISD::AND:
case ISD::OR: {
BitPermutationSelector BPS(CurDAG);
Expand Down
19 changes: 12 additions & 7 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -452,14 +452,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}

// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
// to speed up scalar BSWAP64.
// Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
// instruction xxbrd to speed up scalar BSWAP64.
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
} else {
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(
ISD::BSWAP, MVT::i64,
(Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
}

// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
else
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrP10.td
Expand Up @@ -1744,6 +1744,21 @@ let Predicates = [IsISA3_1] in {
v2i64:$vB))]>;
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
def BRH : XForm_11<31, 219, (outs gprc:$RA), (ins gprc:$RS),
"brh $RA, $RS", IIC_IntRotate,
[(set i32:$RA, (srl (bswap i32:$RS), (i32 16)))]>;
def BRW : XForm_11<31, 155, (outs gprc:$RA), (ins gprc:$RS),
"brw $RA, $RS", IIC_IntRotate,
[(set i32:$RA, (bswap i32:$RS))]>;
let isCodeGenOnly = 1 in {
def BRH8 : XForm_11<31, 219, (outs g8rc:$RA), (ins g8rc:$RS),
"brh $RA, $RS", IIC_IntRotate, []>;
def BRW8 : XForm_11<31, 155, (outs g8rc:$RA), (ins g8rc:$RS),
"brw $RA, $RS", IIC_IntRotate, []>;
}
def BRD : XForm_11<31, 187, (outs g8rc:$RA), (ins g8rc:$RS),
"brd $RA, $RS", IIC_IntRotate,
[(set i64:$RA, (bswap i64:$RS))]>;

// The XFormMemOp flag for the following 8 instructions is set on
// the instruction format.
Expand Down
137 changes: 137 additions & 0 deletions llvm/test/CodeGen/PowerPC/p10-bswap.ll
@@ -0,0 +1,137 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s

; Check that the brh/brw/brd instructions are generated for the bswap
; intrinsic for register operand on P10 and that the lhbrx/lwbrx/ldbrw
; instructions are generated for memory operand.

declare i16 @llvm.bswap.i16(i16)

define zeroext i16 @test_nomem16(i16 zeroext %a) {
; CHECK-LABEL: test_nomem16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: brh r3, r3
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: blr
entry:
%0 = tail call i16 @llvm.bswap.i16(i16 %a)
ret i16 %0
}

declare i32 @llvm.bswap.i32(i32)

define zeroext i32 @test_nomem32(i32 zeroext %a) {
; CHECK-LABEL: test_nomem32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: brw r3, r3
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.bswap.i32(i32 %a)
ret i32 %0
}

; Check that brh and clrldi are produced from a call to @llvm.bswap.i32
; followed by a right shift of 16 (and a zero-extend at the end of the DAG).
define zeroext i32 @test_bswap_shift16(i32 zeroext %a) {
; CHECK-LABEL: test_bswap_shift16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: brh r3, r3
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.bswap.i32(i32 %a)
%shr = lshr i32 %0, 16
ret i32 %shr
}

; Check that brh are produced from a call to @llvm.bswap.i32
; followed by a right shift of 16.
declare i64 @call_1()
define void @test_bswap_shift16_2() {
; CHECK-LABEL: test_bswap_shift16_2:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: bl call_1@notoc
; CHECK-NEXT: brh r3, r3
; CHECK-NEXT: sth r3, 0(r3)
bb:
switch i32 undef, label %bb1 [
i32 78, label %bb2
]

bb1:
unreachable

bb2:
%i = call i64 @call_1()
%i3 = trunc i64 %i to i32
%i4 = call i32 @llvm.bswap.i32(i32 %i3)
%i5 = lshr i32 %i4, 16
%i6 = trunc i32 %i5 to i16
store i16 %i6, ptr undef, align 2
unreachable
}

define zeroext i32 @test_bswap_shift18(i32 zeroext %a) {
; CHECK-LABEL: test_bswap_shift18:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: brw r3, r3
; CHECK-NEXT: rlwinm r3, r3, 14, 18, 31
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.bswap.i32(i32 %a)
%shr = lshr i32 %0, 18
ret i32 %shr
}

declare i64 @llvm.bswap.i64(i64)

define i64 @test_nomem64(i64 %a) {
; CHECK-LABEL: test_nomem64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: brd r3, r3
; CHECK-NEXT: blr
entry:
%0 = tail call i64 @llvm.bswap.i64(i64 %a)
ret i64 %0
}

define i16 @test_mem16(ptr %a) {
; CHECK-LABEL: test_mem16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lhbrx r3, 0, r3
; CHECK-NEXT: blr
entry:
%0 = load i16, ptr %a, align 2
%1 = tail call i16 @llvm.bswap.i16(i16 %0)
ret i16 %1
}

define i32 @test_mem32(ptr %a) {
; CHECK-LABEL: test_mem32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwbrx r3, 0, r3
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr %a, align 4
%1 = tail call i32 @llvm.bswap.i32(i32 %0)
ret i32 %1
}

define i64 @test_mem64(ptr %a) {
; CHECK-LABEL: test_mem64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ldbrx r3, 0, r3
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr %a, align 8
%1 = tail call i64 @llvm.bswap.i64(i64 %0)
ret i64 %1
}

9 changes: 9 additions & 0 deletions llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
Expand Up @@ -423,6 +423,15 @@
# CHECK: xxpermx 6, 63, 21, 34, 2
0x05 0x00 0x00 0x02 0x88 0xdf 0xa8 0x8c

# CHECK: brh 1, 2
0x7c 0x41 0x01 0xb6

# CHECK: brw 1, 2
0x7c 0x41 0x01 0x36

# CHECK: brd 1, 2
0x7c 0x41 0x01 0x76

# CHECK: xxblendvb 6, 63, 21, 34
0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x8c

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
Expand Up @@ -613,6 +613,15 @@
# CHECK-LE: xxpermx 6, 63, 21, 34, 2 # encoding: [0x02,0x00,0x00,0x05,
# CHECK-LE-SAME: 0x8c,0xa8,0xdf,0x88]
xxpermx 6, 63, 21, 34, 2
# CHECK-BE: brh 1, 2 # encoding: [0x7c,0x41,0x01,0xb6]
# CHECK-LE: brh 1, 2 # encoding: [0xb6,0x01,0x41,0x7c]
brh 1, 2
# CHECK-BE: brw 1, 2 # encoding: [0x7c,0x41,0x01,0x36]
# CHECK-LE: brw 1, 2 # encoding: [0x36,0x01,0x41,0x7c]
brw 1, 2
# CHECK-BE: brd 1, 2 # encoding: [0x7c,0x41,0x01,0x76]
# CHECK-LE: brd 1, 2 # encoding: [0x76,0x01,0x41,0x7c]
brd 1, 2
# CHECK-BE: xxblendvb 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00,
# CHECK-BE-SAME: 0x84,0xdf,0xa8,0x8c]
# CHECK-LE: xxblendvb 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05,
Expand Down

0 comments on commit 7a7e910

Please sign in to comment.