Skip to content

Commit

Permalink
[InstCombine] matchRotate - support (uniform) constant rotation amoun…
Browse files Browse the repository at this point in the history
…ts (PR46895)

This patch adds handling of rotation patterns with constant shift amounts - the next bit will be how we want to support non-uniform constant vectors.

Differential Revision: https://reviews.llvm.org/D87452
  • Loading branch information
RKSimon committed Sep 25, 2020
1 parent 994ef4e commit 9ff9c1d
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 52 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Expand Up @@ -2087,8 +2087,6 @@ static Instruction *matchRotate(Instruction &Or) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
if (!isPowerOf2_32(Width))
return nullptr;

// First, find an or'd pair of opposite shifts with the same shifted operand:
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
Expand All @@ -2110,6 +2108,18 @@ static Instruction *matchRotate(Instruction &Or) {
// Match the shift amount operands for a rotate pattern. This always matches
// a subtraction on the R operand.
auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
// Check for constant shift amounts that sum to the bitwidth.
// TODO: Support non-uniform shift amounts.
const APInt *LC, *RC;
if (match(L, m_APInt(LC)) && match(R, m_APInt(RC)))
if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
return L;

// For non-constant cases we don't support non-pow2 shift masks.
// TODO: Is it worth matching urem as well?
if (!isPowerOf2_32(Width))
return nullptr;

// The shift amount may be masked with negation:
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
Value *X;
Expand Down
10 changes: 3 additions & 7 deletions llvm/test/Transforms/InstCombine/bswap.ll
Expand Up @@ -123,9 +123,7 @@ define i32 @bswap32_and_first(i32 %x) {

define i32 @bswap32_and_first_extra_use(i32 %x) {
; CHECK-LABEL: @bswap32_and_first_extra_use(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 16
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 16
; CHECK-NEXT: [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16)
; CHECK-NEXT: [[T:%.*]] = and i32 [[SWAPHALF]], 16711935
; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]])
; CHECK-NEXT: call void @extra_use(i32 [[T]])
Expand Down Expand Up @@ -169,10 +167,8 @@ define i32 @bswap32_shl_first(i32 %x) {

define i32 @bswap32_shl_first_extra_use(i32 %x) {
; CHECK-LABEL: @bswap32_shl_first_extra_use(
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X]], 24
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[SHR]], 8
; CHECK-NEXT: [[T:%.*]] = or i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16)
; CHECK-NEXT: [[T:%.*]] = shl i32 [[SWAPHALF]], 8
; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]])
; CHECK-NEXT: call void @extra_use(i32 [[T]])
; CHECK-NEXT: ret i32 [[BSWAP]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/InstCombine/fsh.ll
Expand Up @@ -521,9 +521,9 @@ define i33 @fshr_multi_use(i33 %a) {

define i33 @expanded_fshr_multi_use(i33 %a) {
; CHECK-LABEL: @expanded_fshr_multi_use(
; CHECK-NEXT: [[TMP:%.*]] = lshr i33 [[A:%.*]], 1
; CHECK-NEXT: [[C:%.*]] = lshr i33 [[A]], 24
; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[TMP]]
; CHECK-NEXT: [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
; CHECK-NEXT: [[C:%.*]] = lshr i33 [[B]], 23
; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[B]]
; CHECK-NEXT: [[E:%.*]] = and i33 [[D]], 31
; CHECK-NEXT: ret i33 [[E]]
;
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/Transforms/InstCombine/or-concat.ll
Expand Up @@ -47,11 +47,9 @@ define <2 x i64> @concat_bswap32_unary_split_vector(<2 x i64> %a0) {

define i64 @concat_bswap32_unary_flip(i64 %a0) {
; CHECK-LABEL: @concat_bswap32_unary_flip(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[A0]], 32
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
; CHECK-NEXT: ret i64 [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
; CHECK-NEXT: ret i64 [[TMP2]]
;
%1 = lshr i64 %a0, 32
%2 = trunc i64 %1 to i32
Expand All @@ -67,11 +65,9 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) {

define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) {
; CHECK-LABEL: @concat_bswap32_unary_flip_vector(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP3]])
; CHECK-NEXT: ret <2 x i64> [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = lshr <2 x i64> %a0, <i64 32, i64 32>
%2 = trunc <2 x i64> %1 to <2 x i32>
Expand Down Expand Up @@ -162,11 +158,9 @@ define <2 x i64> @concat_bitreverse32_unary_split_vector(<2 x i64> %a0) {

define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
; CHECK-LABEL: @concat_bitreverse32_unary_flip(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[A0]], 32
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP3]])
; CHECK-NEXT: ret i64 [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP1]])
; CHECK-NEXT: ret i64 [[TMP2]]
;
%1 = lshr i64 %a0, 32
%2 = trunc i64 %1 to i32
Expand All @@ -182,11 +176,9 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) {

define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) {
; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP3]])
; CHECK-NEXT: ret <2 x i64> [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%1 = lshr <2 x i64> %a0, <i64 32, i64 32>
%2 = trunc <2 x i64> %1 to <2 x i32>
Expand Down
28 changes: 8 additions & 20 deletions llvm/test/Transforms/InstCombine/rotate.ll
Expand Up @@ -3,16 +3,14 @@

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"

; TODO: Canonicalize rotate by constant to funnel shift intrinsics.
; Canonicalize rotate by constant to funnel shift intrinsics.
; This should help cost modeling for vectorization, inlining, etc.
; If a target does not have a rotate instruction, the expansion will
; be exactly these same 3 basic ops (shl/lshr/or).

define i32 @rotl_i32_constant(i32 %x) {
; CHECK-LABEL: @rotl_i32_constant(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 21
; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11)
; CHECK-NEXT: ret i32 [[R]]
;
%shl = shl i32 %x, 11
Expand All @@ -23,9 +21,7 @@ define i32 @rotl_i32_constant(i32 %x) {

define i42 @rotr_i42_constant(i42 %x) {
; CHECK-LABEL: @rotr_i42_constant(
; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[X:%.*]], 31
; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X]], 11
; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31)
; CHECK-NEXT: ret i42 [[R]]
;
%shl = shl i42 %x, 31
Expand All @@ -36,9 +32,7 @@ define i42 @rotr_i42_constant(i42 %x) {

define i8 @rotr_i8_constant_commute(i8 %x) {
; CHECK-LABEL: @rotr_i8_constant_commute(
; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 5
; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], 3
; CHECK-NEXT: [[R:%.*]] = or i8 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5)
; CHECK-NEXT: ret i8 [[R]]
;
%shl = shl i8 %x, 5
Expand All @@ -49,9 +43,7 @@ define i8 @rotr_i8_constant_commute(i8 %x) {

define i88 @rotl_i88_constant_commute(i88 %x) {
; CHECK-LABEL: @rotl_i88_constant_commute(
; CHECK-NEXT: [[SHL:%.*]] = shl i88 [[X:%.*]], 44
; CHECK-NEXT: [[SHR:%.*]] = lshr i88 [[X]], 44
; CHECK-NEXT: [[R:%.*]] = or i88 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44)
; CHECK-NEXT: ret i88 [[R]]
;
%shl = shl i88 %x, 44
Expand All @@ -64,9 +56,7 @@ define i88 @rotl_i88_constant_commute(i88 %x) {

define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
; CHECK-LABEL: @rotl_v2i16_constant_splat(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[X]], <i16 15, i16 15>
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 1, i16 1>
Expand All @@ -79,9 +69,7 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {

define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
; CHECK-LABEL: @rotr_v2i17_constant_splat(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], <i17 12, i17 12>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X]], <i17 5, i17 5>
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shl = shl <2 x i17> %x, <i17 12, i17 12>
Expand All @@ -90,7 +78,7 @@ define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
ret <2 x i17> %r
}

; Allow arbitrary shift constants.
; TODO: Allow arbitrary shift constants.

define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
Expand Down

0 comments on commit 9ff9c1d

Please sign in to comment.