Skip to content

Commit

Permalink
[GlobalISel] ComputeKnownBits - use common KnownBits shift handling (…
Browse files Browse the repository at this point in the history
…PR44526)

Convert GISelKnownBits.computeKnownBitsImpl shift handling to use the common KnownBits implementations, which makes use of the known leading/trailing bits for shifted values in cases where we don't know the shift amount value, as detailed in https://blog.regehr.org/archives/1709

Differential Revision: https://reviews.llvm.org/D90527
  • Loading branch information
RKSimon committed Nov 5, 2020
1 parent d025df3 commit 546d002
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 88 deletions.
57 changes: 22 additions & 35 deletions llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
Expand Up @@ -369,44 +369,31 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
}
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_SHL: {
KnownBits RHSKnown;
case TargetOpcode::G_ASHR: {
KnownBits LHSKnown, RHSKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
if (!RHSKnown.isConstant()) {
LLVM_DEBUG(
MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg());
dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI);
break;
}
uint64_t Shift = RHSKnown.getConstant().getZExtValue();
LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');

// Guard against oversized shift amounts
if (Shift >= MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits())
break;

computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Known = KnownBits::ashr(LHSKnown, RHSKnown);
break;
}
case TargetOpcode::G_LSHR: {
KnownBits LHSKnown, RHSKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);

switch (Opcode) {
case TargetOpcode::G_ASHR:
Known.Zero = Known.Zero.ashr(Shift);
Known.One = Known.One.ashr(Shift);
break;
case TargetOpcode::G_LSHR:
Known.Zero = Known.Zero.lshr(Shift);
Known.One = Known.One.lshr(Shift);
Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift);
break;
case TargetOpcode::G_SHL:
Known.Zero = Known.Zero.shl(Shift);
Known.One = Known.One.shl(Shift);
Known.Zero.setBits(0, Shift);
break;
}
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
Known = KnownBits::lshr(LHSKnown, RHSKnown);
break;
}
case TargetOpcode::G_SHL: {
KnownBits LHSKnown, RHSKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
Known = KnownBits::shl(LHSKnown, RHSKnown);
break;
}
case TargetOpcode::G_INTTOPTR:
Expand Down
76 changes: 23 additions & 53 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
Expand Up @@ -549,8 +549,6 @@ define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
; GFX6-NEXT: s_and_b32 s2, s3, s4
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_lshr_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
Expand Down Expand Up @@ -592,8 +590,6 @@ define amdgpu_ps float @lshr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount)
; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
; GFX6-NEXT: s_and_b32 s0, s1, s2
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
Expand Down Expand Up @@ -626,8 +622,6 @@ define amdgpu_ps float @lshr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount)
; GFX6-NEXT: s_and_b32 s0, s1, s2
; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s2, v1
; GFX6-NEXT: v_and_b32_e32 v0, s2, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
Expand Down Expand Up @@ -674,18 +668,14 @@ define <2 x float> @v_lshr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1
; GFX6-NEXT: v_and_b32_e32 v4, s4, v6
; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
; GFX6-NEXT: v_and_b32_e32 v3, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_lshr_v4i16:
Expand Down Expand Up @@ -722,18 +712,14 @@ define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
; GFX6-NEXT: s_lshr_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s4, s6, s8
; GFX6-NEXT: s_and_b32 s2, s2, s8
; GFX6-NEXT: s_and_b32 s1, s1, s8
; GFX6-NEXT: s_lshr_b32 s2, s2, s4
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_and_b32 s3, s3, s8
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_lshr_v4i16:
Expand Down Expand Up @@ -816,36 +802,28 @@ define <4 x float> @v_lshr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v8, v2
; GFX6-NEXT: v_and_b32_e32 v8, s4, v11
; GFX6-NEXT: v_and_b32_e32 v3, s4, v3
; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v8, v3
; GFX6-NEXT: v_and_b32_e32 v8, s4, v12
; GFX6-NEXT: v_and_b32_e32 v4, s4, v4
; GFX6-NEXT: v_and_b32_e32 v1, v1, v16
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
; GFX6-NEXT: v_and_b32_e32 v8, s4, v13
; GFX6-NEXT: v_and_b32_e32 v5, s4, v5
; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v5, v8, v5
; GFX6-NEXT: v_and_b32_e32 v8, s4, v14
; GFX6-NEXT: v_and_b32_e32 v6, s4, v6
; GFX6-NEXT: v_and_b32_e32 v0, v0, v16
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v6, v8, v6
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, v2, v16
; GFX6-NEXT: v_and_b32_e32 v2, v3, v16
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_and_b32_e32 v8, v15, v16
; GFX6-NEXT: v_and_b32_e32 v7, v7, v16
; GFX6-NEXT: v_and_b32_e32 v3, v5, v16
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v7, v8, v7
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, v4, v16
; GFX6-NEXT: v_and_b32_e32 v4, v7, v16
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, v6, v16
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v3, v6, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_lshr_v8i16:
Expand Down Expand Up @@ -896,32 +874,24 @@ define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
; GFX6-NEXT: s_lshr_b32 s3, s3, s8
; GFX6-NEXT: s_and_b32 s8, s12, s16
; GFX6-NEXT: s_and_b32 s4, s4, s16
; GFX6-NEXT: s_and_b32 s1, s1, s16
; GFX6-NEXT: s_lshr_b32 s4, s4, s8
; GFX6-NEXT: s_and_b32 s8, s13, s16
; GFX6-NEXT: s_and_b32 s5, s5, s16
; GFX6-NEXT: s_lshr_b32 s5, s5, s8
; GFX6-NEXT: s_and_b32 s8, s14, s16
; GFX6-NEXT: s_and_b32 s6, s6, s16
; GFX6-NEXT: s_and_b32 s0, s0, s16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s6, s6, s8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s16
; GFX6-NEXT: s_and_b32 s2, s3, s16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_and_b32 s8, s15, s16
; GFX6-NEXT: s_and_b32 s7, s7, s16
; GFX6-NEXT: s_and_b32 s3, s5, s16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, s8
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s16
; GFX6-NEXT: s_and_b32 s4, s7, s16
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s16
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s3, s3, s4
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_lshr_v8i16:
Expand Down

0 comments on commit 546d002

Please sign in to comment.