Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ struct ShiftOfShiftedLogic {
uint64_t ValSum;
};

struct LshrOfTruncOfLshr {
bool Mask = false;
APInt MaskVal;
Register Src;
APInt ShiftAmt;
LLT ShiftAmtTy;
LLT InnerShiftTy;
};

using BuildFnTy = std::function<void(MachineIRBuilder &)>;

using OperandBuildSteps =
Expand Down Expand Up @@ -338,6 +347,12 @@ class CombinerHelper {

bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const;

/// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo,
MachineInstr &ShiftMI) const;
void applyLshrOfTruncOfLshr(MachineInstr &MI,
LshrOfTruncOfLshr &MatchInfo) const;

/// Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;
Expand Down
13 changes: 12 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,16 @@ def commute_shift : GICombineRule<
[{ return Helper.matchCommuteShift(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${d}, ${matchinfo}); }])>;

// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (lshr x, (C1 + C2))
def lshr_of_trunc_of_lshr_matchdata : GIDefMatchData<"LshrOfTruncOfLshr">;
def lshr_of_trunc_of_lshr : GICombineRule<
(defs root:$root, lshr_of_trunc_of_lshr_matchdata:$matchinfo),
(match (G_LSHR $d1, $x, $y):$Shift,
(G_TRUNC $d2, $d1),
(G_LSHR $dst, $d2, $z):$root,
[{ return Helper.matchLshrOfTruncOfLshr(*${root}, ${matchinfo}, *${Shift}); }]),
(apply [{ Helper.applyLshrOfTruncOfLshr(*${root}, ${matchinfo}); }])>;

def narrow_binop_feeding_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_AND):$root,
Expand Down Expand Up @@ -2133,7 +2143,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
simplify_neg_minmax, combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
combine_use_vector_truncate, merge_combines, overflow_combines, truncsat_combines]>;
combine_use_vector_truncate, merge_combines, overflow_combines,
truncsat_combines, lshr_of_trunc_of_lshr]>;

// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
Expand Down
62 changes: 62 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2094,6 +2094,68 @@ bool CombinerHelper::matchCommuteShift(MachineInstr &MI,
return true;
}

bool CombinerHelper::matchLshrOfTruncOfLshr(MachineInstr &MI,
LshrOfTruncOfLshr &MatchInfo,
MachineInstr &ShiftMI) const {
assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");

Register N0 = MI.getOperand(1).getReg();
Register N1 = MI.getOperand(2).getReg();
unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();

APInt N1C, N001C;
if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
return false;
auto N001 = ShiftMI.getOperand(2).getReg();
if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
return false;

if (N001C.getBitWidth() > N1C.getBitWidth())
N1C = N1C.zext(N001C.getBitWidth());
else
N001C = N001C.zext(N1C.getBitWidth());

Register InnerShift = ShiftMI.getOperand(0).getReg();
LLT InnerShiftTy = MRI.getType(InnerShift);
uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
if ((N1C + N001C).ult(InnerShiftSize)) {
MatchInfo.Src = ShiftMI.getOperand(1).getReg();
MatchInfo.ShiftAmt = N1C + N001C;
MatchInfo.ShiftAmtTy = MRI.getType(N001);
MatchInfo.InnerShiftTy = InnerShiftTy;

if ((N001C + OpSizeInBits) == InnerShiftSize)
return true;
if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
MatchInfo.Mask = true;
MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
return true;
}
}
return false;
}

void CombinerHelper::applyLshrOfTruncOfLshr(
MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");

Register Dst = MI.getOperand(0).getReg();
auto ShiftAmt =
Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
auto Shift =
Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
if (MatchInfo.Mask == true) {
APInt MaskVal =
APInt::getLowBitsSet(MatchInfo.InnerShiftTy.getScalarSizeInBits(),
MatchInfo.MaskVal.getZExtValue());
auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
Builder.buildTrunc(Dst, And);
} else
Builder.buildTrunc(Dst, Shift);
MI.eraseFromParent();
}

bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) const {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -369,5 +369,5 @@ def AArch64PostLegalizerCombiner
commute_constant_to_rhs, extract_vec_elt_combines,
push_freeze_to_prevent_poison_from_propagating,
combine_mul_cmlt, combine_use_vector_truncate,
extmultomull, truncsat_combines]> {
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr]> {
}
125 changes: 125 additions & 0 deletions llvm/test/CodeGen/AArch64/lshr-trunc-lshr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i32 @s32_test1(i64 %a) {
; CHECK-LABEL: s32_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x0, x0, #48
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%r = lshr i64 %a, 32
%ret = trunc i64 %r to i32
%x = lshr i32 %ret, 16
ret i32 %x
}

define i32 @s32_test2(i64 %a) {
; CHECK-LABEL: s32_test2:
; CHECK: // %bb.0:
; CHECK-NEXT: ubfx x0, x0, #32, #16
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%r = lshr i64 %a, 16
%ret = trunc i64 %r to i32
%x = lshr i32 %ret, 16
ret i32 %x
}

define <8 x i8> @v8s8_test1(<8 x i16> %a) {
; CHECK-LABEL: v8s8_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.8h, v0.8h, #12
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%r = lshr <8 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%ret = trunc <8 x i16> %r to <8 x i8>
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
ret <8 x i8> %x
}

define <8 x i8> @v8s8_test2(<8 x i16> %a) {
; CHECK-SD-LABEL: v8s8_test2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushr v0.8h, v0.8h, #8
; CHECK-SD-NEXT: bic v0.8h, #240
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v8s8_test2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v1.8h, #15
; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #8
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: ret
%r = lshr <8 x i16> %a, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%ret = trunc <8 x i16> %r to <8 x i8>
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
ret <8 x i8> %x
}

define <4 x i16> @v4s16_test1(<4 x i32> %a) {
; CHECK-LABEL: v4s16_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.4s, v0.4s, #24
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%r = lshr <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
%ret = trunc <4 x i32> %r to <4 x i16>
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
ret <4 x i16> %x
}

define <4 x i16> @v4s16_test2(<4 x i32> %a) {
; CHECK-SD-LABEL: v4s16_test2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v4s16_test2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #16
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
%r = lshr <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
%ret = trunc <4 x i32> %r to <4 x i16>
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
ret <4 x i16> %x
}

define <2 x i32> @v2s32_test1(<2 x i64> %a) {
; CHECK-LABEL: v2s32_test1:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.2d, v0.2d, #48
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: ret
%r = lshr <2 x i64> %a, <i64 32, i64 32>
%ret = trunc <2 x i64> %r to <2 x i32>
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
ret <2 x i32> %x
}

define <2 x i32> @v2s32_test2(<2 x i64> %a) {
; CHECK-SD-LABEL: v2s32_test2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #32
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v2s32_test2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
%r = lshr <2 x i64> %a, <i64 16, i64 16>
%ret = trunc <2 x i64> %r to <2 x i32>
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
ret <2 x i32> %x
}
65 changes: 32 additions & 33 deletions llvm/test/CodeGen/AArch64/rem-by-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -336,26 +336,15 @@ entry:
}

define i32 @ui32_100(i32 %a, i32 %b) {
; CHECK-SD-LABEL: ui32_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
; CHECK-SD-NEXT: mov w9, #100 // =0x64
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: umull x8, w0, w8
; CHECK-SD-NEXT: lsr x8, x8, #37
; CHECK-SD-NEXT: msub w0, w8, w9, w0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ui32_100:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
; CHECK-GI-NEXT: mov w9, #100 // =0x64
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
; CHECK-GI-NEXT: umull x8, w0, w8
; CHECK-GI-NEXT: lsr x8, x8, #32
; CHECK-GI-NEXT: lsr w8, w8, #5
; CHECK-GI-NEXT: msub w0, w8, w9, w0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: ui32_100:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #34079 // =0x851f
; CHECK-NEXT: mov w9, #100 // =0x64
; CHECK-NEXT: movk w8, #20971, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #37
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
entry:
%s = urem i32 %a, 100
ret i32 %s
Expand Down Expand Up @@ -1619,15 +1608,25 @@ entry:
}

define <8 x i8> @uv8i8_100(<8 x i8> %d, <8 x i8> %e) {
; CHECK-LABEL: uv8i8_100:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.8b, #41
; CHECK-NEXT: movi v2.8b, #100
; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b
; CHECK-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-NEXT: ushr v1.8b, v1.8b, #4
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
; CHECK-SD-LABEL: uv8i8_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v1.8b, #41
; CHECK-SD-NEXT: movi v2.8b, #100
; CHECK-SD-NEXT: umull v1.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #4
; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv8i8_100:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v1.8b, #41
; CHECK-GI-NEXT: movi v2.8b, #100
; CHECK-GI-NEXT: umull v1.8h, v0.8b, v1.8b
; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #12
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
; CHECK-GI-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
entry:
%s = urem <8 x i8> %d, <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
ret <8 x i8> %s
Expand Down Expand Up @@ -2301,8 +2300,8 @@ define <4 x i16> @uv4i16_100(<4 x i16> %d, <4 x i16> %e) {
; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI53_0]
; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-GI-NEXT: movi v2.4h, #100
; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #1
; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #17
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ret
entry:
Expand Down Expand Up @@ -2656,8 +2655,8 @@ define <2 x i32> @uv2i32_100(<2 x i32> %d, <2 x i32> %e) {
; CHECK-GI-NEXT: movi v2.2s, #100
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI63_0]
; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #5
; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #37
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
; CHECK-GI-NEXT: ret
entry:
Expand Down
29 changes: 9 additions & 20 deletions llvm/test/CodeGen/AArch64/urem-lkk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,15 @@ define i32 @fold_urem_positive_odd(i32 %x) {
}

define i32 @fold_urem_positive_even(i32 %x) {
; CHECK-SD-LABEL: fold_urem_positive_even:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w8, #16323 // =0x3fc3
; CHECK-SD-NEXT: mov w9, #1060 // =0x424
; CHECK-SD-NEXT: movk w8, #63310, lsl #16
; CHECK-SD-NEXT: umull x8, w0, w8
; CHECK-SD-NEXT: lsr x8, x8, #42
; CHECK-SD-NEXT: msub w0, w8, w9, w0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fold_urem_positive_even:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #16323 // =0x3fc3
; CHECK-GI-NEXT: mov w9, #1060 // =0x424
; CHECK-GI-NEXT: movk w8, #63310, lsl #16
; CHECK-GI-NEXT: umull x8, w0, w8
; CHECK-GI-NEXT: lsr x8, x8, #32
; CHECK-GI-NEXT: lsr w8, w8, #10
; CHECK-GI-NEXT: msub w0, w8, w9, w0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fold_urem_positive_even:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16323 // =0x3fc3
; CHECK-NEXT: mov w9, #1060 // =0x424
; CHECK-NEXT: movk w8, #63310, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #42
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
%1 = urem i32 %x, 1060
ret i32 %1
}
Expand Down
Loading