Skip to content

Commit

Permalink
[InstCombine] Convert or concat to fshl if opposite or concat exists
Browse files Browse the repository at this point in the history
If there are two 'or' instructions concat variables in opposite order
and the first 'or' dominates the second one, the second 'or' can be
optimized to fshl to rotate shift first 'or'. This can eliminate an shl
and expose more optimization opportunity for bswap/bitreverse.
  • Loading branch information
HaohaiWen committed Oct 8, 2023
1 parent 5b3b1bb commit 68ab662
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 2 deletions.
46 changes: 44 additions & 2 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2727,7 +2727,8 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I,
}

/// Match UB-safe variants of the funnel shift intrinsic.
static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC,
const DominatorTree &DT) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
Expand Down Expand Up @@ -2832,6 +2833,47 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
return nullptr;

FShiftArgs = {ShVal0, ShVal1, ShAmt};

} else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) {
// If there are two 'or' instructions concat variables in opposite order,
// the latter one can be safely convert to fshl.
//
// LowHigh = or (shl (zext Low), Width - ZextHighShlAmt), (zext High)
// HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)
// ->
// HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt
if (!isa<ZExtInst>(Or1))
std::swap(Or0, Or1);

Value *High, *ZextHigh, *Low;
const APInt *ZextHighShlAmt;
if (!match(Or0,
m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt)))))
return nullptr;

if (!match(Or1, m_ZExt(m_Value(Low))) ||
!match(ZextHigh, m_ZExt(m_Value(High))))
return nullptr;

unsigned HighSize = High->getType()->getScalarSizeInBits();
unsigned LowSize = Low->getType()->getScalarSizeInBits();
if (*ZextHighShlAmt != LowSize || HighSize + LowSize != Width)
return nullptr;

for (User *U : ZextHigh->users()) {
Value *X, *Y;
if (!match(U, m_Or(m_Value(X), m_Value(Y))))
continue;

if (!isa<ZExtInst>(Y))
std::swap(X, Y);

if (match(X, m_Shl(m_Specific(Or1), m_SpecificInt(HighSize))) &&
match(Y, m_Specific(ZextHigh)) && DT.dominates(U, &Or)) {
FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)};
break;
}
}
}

if (FShiftArgs.empty())
Expand Down Expand Up @@ -3333,7 +3375,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*MatchBitReversals*/ true))
return BitOp;

if (Instruction *Funnel = matchFunnelShift(I, *this))
if (Instruction *Funnel = matchFunnelShift(I, *this, DT))
return Funnel;

if (Instruction *Concat = matchOrConcat(I, Builder))
Expand Down
42 changes: 42 additions & 0 deletions llvm/test/Transforms/InstCombine/funnel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,48 @@ define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %sham
ret <2 x i64> %r
}

; Convert 'or concat' to fshl if opposite 'or concat' exists.

define i32 @fshl_concat(i8 %x, i24 %y, ptr %addr) {
; CHECK-LABEL: @fshl_concat(
; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: [[SLX:%.*]] = shl nuw i32 [[ZEXT_X]], 24
; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i24 [[Y:%.*]] to i32
; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 8)
; CHECK-NEXT: ret i32 [[YX]]
;
%zext.x = zext i8 %x to i32
%slx = shl nuw i32 %zext.x, 24
%zext.y = zext i24 %y to i32
%xy = or i32 %zext.y, %slx
store i32 %xy, ptr %addr, align 4
%sly = shl nuw i32 %zext.y, 8
%yx = or i32 %zext.x, %sly
ret i32 %yx
}

define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) {
; CHECK-LABEL: @fshl_concat_vector(
; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
; CHECK-NEXT: [[XY:%.*]] = or <2 x i32> [[SLX]], [[ZEXT_Y]]
; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
; CHECK-NEXT: ret <2 x i32> [[YX]]
;
%zext.x = zext <2 x i8> %x to <2 x i32>
%slx = shl nuw <2 x i32> %zext.x, <i32 24, i32 24>
%zext.y = zext <2 x i24> %y to <2 x i32>
%xy = or <2 x i32> %slx, %zext.y
store <2 x i32> %xy, ptr %addr, align 4
%sly = shl nuw <2 x i32> %zext.y, <i32 8, i32 8>
%yx = or <2 x i32> %sly, %zext.x
ret <2 x i32> %yx
}

; Negative test - an oversized shift in the narrow type would produce the wrong value.

define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %y, i32 %shamt) {
Expand Down

0 comments on commit 68ab662

Please sign in to comment.