Skip to content

Commit

Permalink
[PPC CodeGen] Expand the bitreverse.i32 intrinsic.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tony Jiang committed Jul 7, 2017
1 parent 6687318 commit c260e0e
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 23 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -136,6 +136,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}

// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
Expand Down
68 changes: 68 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Expand Up @@ -4454,3 +4454,71 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>;
def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>;

} // IsISA3_0

// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC);
// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0);
// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]):
// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes):
// n' = (n rotl 24); After which n' = [B4, B1, B2, B3]
// Step 4.2: Insert B3 to the right position:
// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3]
// Step 4.3: Insert B1 to the right position:
// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1]
def MaskValues {
dag Lo1 = (ORI (LIS 0x5555), 0x5555);
dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA);
dag Lo2 = (ORI (LIS 0x3333), 0x3333);
dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC);
dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F);
dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0);
}

def Shift1 {
dag Right = (RLWINM $A, 31, 1, 31);
dag Left = (RLWINM $A, 1, 0, 30);
}

def Swap1 {
dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1),
(AND Shift1.Left, MaskValues.Hi1));
}

def Shift2 {
dag Right = (RLWINM Swap1.Bit, 30, 2, 31);
dag Left = (RLWINM Swap1.Bit, 2, 0, 29);
}

def Swap2 {
dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2),
(AND Shift2.Left, MaskValues.Hi2));
}

def Shift4 {
dag Right = (RLWINM Swap2.Bits, 28, 4, 31);
dag Left = (RLWINM Swap2.Bits, 4, 0, 27);
}

def Swap4 {
dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4),
(AND Shift4.Left, MaskValues.Hi4));
}

def Rotate {
dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31);
}

def RotateInsertByte3 {
dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15);
}

def RotateInsertByte1 {
dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
}

def : Pat<(i32 (bitreverse i32:$A)),
(RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
23 changes: 0 additions & 23 deletions llvm/test/CodeGen/PowerPC/bitreverse.ll

This file was deleted.

67 changes: 67 additions & 0 deletions llvm/test/CodeGen/PowerPC/pr33093.ll
@@ -0,0 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s

define zeroext i32 @ReverseBits(i32 zeroext %n) {
; CHECK-LABEL: ReverseBits:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lis 4, -21846
; CHECK-NEXT: lis 5, 21845
; CHECK-NEXT: slwi 6, 3, 1
; CHECK-NEXT: srwi 3, 3, 1
; CHECK-NEXT: lis 7, -13108
; CHECK-NEXT: lis 8, 13107
; CHECK-NEXT: ori 4, 4, 43690
; CHECK-NEXT: ori 5, 5, 21845
; CHECK-NEXT: lis 10, -3856
; CHECK-NEXT: lis 11, 3855
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: and 4, 6, 4
; CHECK-NEXT: ori 5, 8, 13107
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: ori 4, 7, 52428
; CHECK-NEXT: slwi 9, 3, 2
; CHECK-NEXT: srwi 3, 3, 2
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: and 4, 9, 4
; CHECK-NEXT: ori 5, 11, 3855
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: ori 4, 10, 61680
; CHECK-NEXT: slwi 12, 3, 4
; CHECK-NEXT: srwi 3, 3, 4
; CHECK-NEXT: and 4, 12, 4
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: rotlwi 4, 3, 24
; CHECK-NEXT: rlwimi 4, 3, 8, 8, 15
; CHECK-NEXT: rlwimi 4, 3, 8, 24, 31
; CHECK-NEXT: rldicl 3, 4, 0, 32
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
entry:
%shr = lshr i32 %n, 1
%and = and i32 %shr, 1431655765
%and1 = shl i32 %n, 1
%shl = and i32 %and1, -1431655766
%or = or i32 %and, %shl
%shr2 = lshr i32 %or, 2
%and3 = and i32 %shr2, 858993459
%and4 = shl i32 %or, 2
%shl5 = and i32 %and4, -858993460
%or6 = or i32 %and3, %shl5
%shr7 = lshr i32 %or6, 4
%and8 = and i32 %shr7, 252645135
%and9 = shl i32 %or6, 4
%shl10 = and i32 %and9, -252645136
%or11 = or i32 %and8, %shl10
%shr13 = lshr i32 %or11, 24
%and14 = lshr i32 %or11, 8
%shr15 = and i32 %and14, 65280
%and17 = shl i32 %or11, 8
%shl18 = and i32 %and17, 16711680
%shl21 = shl i32 %or11, 24
%or16 = or i32 %shl21, %shr13
%or19 = or i32 %or16, %shr15
%or22 = or i32 %or19, %shl18
ret i32 %or22
}
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/PowerPC/testBitReverse.ll
@@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
declare i32 @llvm.bitreverse.i32(i32)
define i32 @testBitReverseIntrinsicI32(i32 %arg) {
; CHECK-LABEL: testBitReverseIntrinsicI32:
; CHECK: # BB#0:
; CHECK-NEXT: lis 4, -21846
; CHECK-NEXT: lis 5, 21845
; CHECK-NEXT: slwi 6, 3, 1
; CHECK-NEXT: srwi 3, 3, 1
; CHECK-NEXT: lis 7, -13108
; CHECK-NEXT: lis 8, 13107
; CHECK-NEXT: ori 4, 4, 43690
; CHECK-NEXT: ori 5, 5, 21845
; CHECK-NEXT: lis 10, -3856
; CHECK-NEXT: lis 11, 3855
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: and 4, 6, 4
; CHECK-NEXT: ori 5, 8, 13107
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: ori 4, 7, 52428
; CHECK-NEXT: slwi 9, 3, 2
; CHECK-NEXT: srwi 3, 3, 2
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: and 4, 9, 4
; CHECK-NEXT: ori 5, 11, 3855
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: ori 4, 10, 61680
; CHECK-NEXT: slwi 12, 3, 4
; CHECK-NEXT: srwi 3, 3, 4
; CHECK-NEXT: and 4, 12, 4
; CHECK-NEXT: and 3, 3, 5
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: rotlwi 4, 3, 24
; CHECK-NEXT: rlwimi 4, 3, 8, 8, 15
; CHECK-NEXT: rlwimi 4, 3, 8, 24, 31
; CHECK-NEXT: rldicl 3, 4, 0, 32
; CHECK-NEXT: blr
%res = call i32 @llvm.bitreverse.i32(i32 %arg)
ret i32 %res
}

0 comments on commit c260e0e

Please sign in to comment.