Skip to content

Commit

Permalink
[InstCombine] matchFunnelShift - fold or(shl(a,x),lshr(b,sub(bw,x))) …
Browse files Browse the repository at this point in the history
…-> fshl(a,b,x) iff x < bw (REAPPLIED)

If value tracking can confirm that a shift value is less than the type bitwidth then we can more confidently fold general or(shl(a,x),lshr(b,sub(bw,x))) patterns to a funnel/rotate intrinsic pattern without causing bad codegen regressions in the backend (see D89139).

Reapplied after the shift canonicalization in rG02295e6d1a15 which removed the need to flip the shift values.

Differential Revision: https://reviews.llvm.org/D88783
  • Loading branch information
RKSimon committed Oct 12, 2020
1 parent fa56623 commit bbf3925
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 35 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2053,7 +2053,7 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) {
}

/// Match UB-safe variants of the funnel shift intrinsic.
static Instruction *matchFunnelShift(Instruction &Or) {
static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
Expand Down Expand Up @@ -2100,6 +2100,16 @@ static Instruction *matchFunnelShift(Instruction &Or) {
return L;
}

// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
// We limit this to X < Width in case the backend re-expands the intrinsic,
// and has to reintroduce a shift modulo operation (InstCombine might remove
// it after this fold). This still doesn't guarantee that the final codegen
// will match this original pattern.
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
return KnownL.getMaxValue().ult(Width) ? L : nullptr;
}

// For non-constant cases, the following patterns currently only work for
// rotation patterns.
// TODO: Add general funnel-shift compatible patterns.
Expand Down Expand Up @@ -2593,7 +2603,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *BSwap = matchBSwap(I))
return BSwap;

if (Instruction *Funnel = matchFunnelShift(I))
if (Instruction *Funnel = matchFunnelShift(I, *this))
return Funnel;

if (Instruction *Concat = matchOrConcat(I, Builder))
Expand Down
18 changes: 3 additions & 15 deletions llvm/test/Transforms/InstCombine/funnel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,7 @@ define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y

define i64 @fshl_sub_mask(i64 %x, i64 %y, i64 %a) {
; CHECK-LABEL: @fshl_sub_mask(
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X:%.*]], [[MASK]]
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[Y:%.*]], [[SUB]]
; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[A:%.*]])
; CHECK-NEXT: ret i64 [[R]]
;
%mask = and i64 %a, 63
Expand All @@ -187,11 +183,7 @@ define i64 @fshl_sub_mask(i64 %x, i64 %y, i64 %a) {

define i64 @fshr_sub_mask(i64 %x, i64 %y, i64 %a) {
; CHECK-LABEL: @fshr_sub_mask(
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], [[MASK]]
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]]
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y:%.*]], [[SUB]]
; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[Y:%.*]], i64 [[X:%.*]], i64 [[A:%.*]])
; CHECK-NEXT: ret i64 [[R]]
;
%mask = and i64 %a, 63
Expand All @@ -204,11 +196,7 @@ define i64 @fshr_sub_mask(i64 %x, i64 %y, i64 %a) {

define <2 x i64> @fshr_sub_mask_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %a) {
; CHECK-LABEL: @fshr_sub_mask_vector(
; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 63>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i64> [[X:%.*]], [[MASK]]
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <2 x i64> <i64 64, i64 64>, [[MASK]]
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[Y:%.*]], [[SUB]]
; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[Y:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%mask = and <2 x i64> %a, <i64 63, i64 63>
Expand Down
24 changes: 6 additions & 18 deletions llvm/test/Transforms/InstCombine/rotate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -676,12 +676,8 @@ define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {

define i64 @rotl_sub_mask(i64 %0, i64 %1) {
; CHECK-LABEL: @rotl_sub_mask(
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
; CHECK-NEXT: ret i64 [[TMP7]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
; CHECK-NEXT: ret i64 [[TMP3]]
;
%3 = and i64 %1, 63
%4 = shl i64 %0, %3
Expand All @@ -695,12 +691,8 @@ define i64 @rotl_sub_mask(i64 %0, i64 %1) {

define i64 @rotr_sub_mask(i64 %0, i64 %1) {
; CHECK-LABEL: @rotr_sub_mask(
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
; CHECK-NEXT: ret i64 [[TMP7]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
; CHECK-NEXT: ret i64 [[TMP3]]
;
%3 = and i64 %1, 63
%4 = lshr i64 %0, %3
Expand All @@ -712,12 +704,8 @@ define i64 @rotr_sub_mask(i64 %0, i64 %1) {

define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: @rotr_sub_mask_vector(
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], <i64 63, i64 63>
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <2 x i64> <i64 64, i64 64>, [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]]
; CHECK-NEXT: ret <2 x i64> [[TMP7]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%3 = and <2 x i64> %1, <i64 63, i64 63>
%4 = lshr <2 x i64> %0, %3
Expand Down

0 comments on commit bbf3925

Please sign in to comment.