Skip to content

Commit 5a22656

Browse files
committed
Reapply [InstSimplify] Remove known bits constant folding
No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294
1 parent 989ae9e commit 5a22656

File tree

7 files changed

+63
-123
lines changed

7 files changed

+63
-123
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5600,9 +5600,6 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
56005600
break;
56015601
case Instruction::Call: {
56025602
Result = SimplifyCall(cast<CallInst>(I), Q);
5603-
// Don't perform known bits simplification below for musttail calls.
5604-
if (cast<CallInst>(I)->isMustTailCall())
5605-
return Result;
56065603
break;
56075604
}
56085605
case Instruction::Freeze:
@@ -5620,14 +5617,6 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
56205617
break;
56215618
}
56225619

5623-
// In general, it is possible for computeKnownBits to determine all bits in a
5624-
// value even when the operands are not all constants.
5625-
if (!Result && I->getType()->isIntOrIntVectorTy()) {
5626-
KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
5627-
if (Known.isConstant())
5628-
Result = ConstantInt::get(I->getType(), Known.getConstant());
5629-
}
5630-
56315620
/// If called on unreachable code, the above logic may report that the
56325621
/// instruction simplified to itself. Make life easier for users by
56335622
/// detecting that case here, returning a safe value instead.

llvm/test/Analysis/ValueTracking/knownzero-shift.ll

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ define i1 @test(i8 %p, i8* %pq) {
1515

1616
!0 = !{ i8 1, i8 5 }
1717

18+
; The following cases only get folded by InstCombine,
19+
; see InstCombine/shift-shift.ll. If we wanted to,
20+
; we could explicitly handle them in InstSimplify as well.
21+
1822
define i32 @shl_shl(i32 %A) {
1923
; CHECK-LABEL: @shl_shl(
20-
; CHECK-NEXT: ret i32 0
24+
; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 6
25+
; CHECK-NEXT: [[C:%.*]] = shl i32 [[B]], 28
26+
; CHECK-NEXT: ret i32 [[C]]
2127
;
2228
%B = shl i32 %A, 6
2329
%C = shl i32 %B, 28
@@ -26,7 +32,9 @@ define i32 @shl_shl(i32 %A) {
2632

2733
define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) {
2834
; CHECK-LABEL: @shl_shl_splat_vec(
29-
; CHECK-NEXT: ret <2 x i33> zeroinitializer
35+
; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], <i33 5, i33 5>
36+
; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 28, i33 28>
37+
; CHECK-NEXT: ret <2 x i33> [[C]]
3038
;
3139
%B = shl <2 x i33> %A, <i33 5, i33 5>
3240
%C = shl <2 x i33> %B, <i33 28, i33 28>
@@ -37,7 +45,7 @@ define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) {
3745

3846
define <2 x i33> @shl_shl_vec(<2 x i33> %A) {
3947
; CHECK-LABEL: @shl_shl_vec(
40-
; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> %A, <i33 6, i33 5>
48+
; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], <i33 6, i33 5>
4149
; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 27, i33 28>
4250
; CHECK-NEXT: ret <2 x i33> [[C]]
4351
;
@@ -48,7 +56,9 @@ define <2 x i33> @shl_shl_vec(<2 x i33> %A) {
4856

4957
define i232 @lshr_lshr(i232 %A) {
5058
; CHECK-LABEL: @lshr_lshr(
51-
; CHECK-NEXT: ret i232 0
59+
; CHECK-NEXT: [[B:%.*]] = lshr i232 [[A:%.*]], 231
60+
; CHECK-NEXT: [[C:%.*]] = lshr i232 [[B]], 1
61+
; CHECK-NEXT: ret i232 [[C]]
5262
;
5363
%B = lshr i232 %A, 231
5464
%C = lshr i232 %B, 1
@@ -57,7 +67,9 @@ define i232 @lshr_lshr(i232 %A) {
5767

5868
define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {
5969
; CHECK-LABEL: @lshr_lshr_splat_vec(
60-
; CHECK-NEXT: ret <2 x i32> zeroinitializer
70+
; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 28, i32 28>
71+
; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 4>
72+
; CHECK-NEXT: ret <2 x i32> [[C]]
6173
;
6274
%B = lshr <2 x i32> %A, <i32 28, i32 28>
6375
%C = lshr <2 x i32> %B, <i32 4, i32 4>
@@ -66,7 +78,9 @@ define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {
6678

6779
define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) {
6880
; CHECK-LABEL: @lshr_lshr_vec(
69-
; CHECK-NEXT: ret <2 x i32> zeroinitializer
81+
; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 29, i32 28>
82+
; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 5>
83+
; CHECK-NEXT: ret <2 x i32> [[C]]
7084
;
7185
%B = lshr <2 x i32> %A, <i32 29, i32 28>
7286
%C = lshr <2 x i32> %B, <i32 4, i32 5>

llvm/test/Transforms/GVN/PRE/volatile.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,17 @@ exit:
197197
ret i32 %add
198198
}
199199

200+
; This test checks that we don't optimize away instructions that are
201+
; simplified by SimplifyInstruction(), but are not trivially dead.
202+
200203
define i32 @test9(i32* %V) {
201204
; CHECK-LABEL: @test9(
202205
; CHECK-NEXT: entry:
203-
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[V:%.*]], !range !0
204-
; CHECK-NEXT: ret i32 0
206+
; CHECK-NEXT: [[LOAD:%.*]] = call i32 undef()
207+
; CHECK-NEXT: ret i32 undef
205208
;
206209
entry:
207-
%load = load volatile i32, i32* %V, !range !0
210+
%load = call i32 undef()
208211
ret i32 %load
209212
}
210213

llvm/test/Transforms/InstSimplify/assume.ll

Lines changed: 0 additions & 93 deletions
This file was deleted.

llvm/test/Transforms/InstSimplify/call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ declare i8* @passthru_p8(i8* returned)
988988
define i32 @returned_const_int_arg() {
989989
; CHECK-LABEL: @returned_const_int_arg(
990990
; CHECK-NEXT: [[X:%.*]] = call i32 @passthru_i32(i32 42)
991-
; CHECK-NEXT: ret i32 42
991+
; CHECK-NEXT: ret i32 [[X]]
992992
;
993993
%x = call i32 @passthru_i32(i32 42)
994994
ret i32 %x

llvm/test/Transforms/InstSimplify/or.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,17 @@ define i8 @test10(i8 %A) {
9898
ret i8 %D
9999
}
100100

101+
; The following two cases only get folded by InstCombine,
102+
; see InstCombine/or-xor.ll.
103+
101104
; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
102105
define i8 @test11(i8 %A) {
103106
; CHECK-LABEL: @test11(
104-
; CHECK-NEXT: ret i8 -1
107+
; CHECK-NEXT: [[B:%.*]] = or i8 [[A:%.*]], -2
108+
; CHECK-NEXT: [[C:%.*]] = xor i8 [[B]], 13
109+
; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1
110+
; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12
111+
; CHECK-NEXT: ret i8 [[E]]
105112
;
106113
%B = or i8 %A, -2
107114
%C = xor i8 %B, 13
@@ -112,7 +119,12 @@ define i8 @test11(i8 %A) {
112119

113120
define i8 @test11v(<2 x i8> %A) {
114121
; CHECK-LABEL: @test11v(
115-
; CHECK-NEXT: ret i8 -1
122+
; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 -2, i8 0>
123+
; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], <i8 13, i8 13>
124+
; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i8> [[CV]], i32 0
125+
; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1
126+
; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12
127+
; CHECK-NEXT: ret i8 [[E]]
116128
;
117129
%B = or <2 x i8> %A, <i8 -2, i8 0>
118130
%CV = xor <2 x i8> %B, <i8 13, i8 13>

llvm/test/Transforms/InstSimplify/shift-knownbits.ll

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ define i1 @shl_i1(i1 %a, i1 %b) {
145145
ret i1 %shl
146146
}
147147

148-
; Simplify count leading/trailing zeros to zero if all valid bits are shifted out.
148+
; The following cases only get folded by InstCombine,
149+
; see InstCombine/lshr.ll.
149150

150151
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
151152
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
@@ -154,7 +155,9 @@ declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
154155

155156
define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
156157
; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
157-
; CHECK-NEXT: ret i32 0
158+
; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true)
159+
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5
160+
; CHECK-NEXT: ret i32 [[SH]]
158161
;
159162
%ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
160163
%sh = lshr i32 %ct, 5
@@ -163,7 +166,9 @@ define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
163166

164167
define i32 @lshr_cttz_zero_is_undef(i32 %x) {
165168
; CHECK-LABEL: @lshr_cttz_zero_is_undef(
166-
; CHECK-NEXT: ret i32 0
169+
; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
170+
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5
171+
; CHECK-NEXT: ret i32 [[SH]]
167172
;
168173
%ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
169174
%sh = lshr i32 %ct, 5
@@ -172,7 +177,9 @@ define i32 @lshr_cttz_zero_is_undef(i32 %x) {
172177

173178
define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
174179
; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
175-
; CHECK-NEXT: ret <2 x i8> zeroinitializer
180+
; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
181+
; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
182+
; CHECK-NEXT: ret <2 x i8> [[SH]]
176183
;
177184
%ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
178185
%sh = lshr <2 x i8> %ct, <i8 3, i8 3>
@@ -181,7 +188,10 @@ define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
181188

182189
define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
183190
; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec(
184-
; CHECK-NEXT: ret i8 0
191+
; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
192+
; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
193+
; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
194+
; CHECK-NEXT: ret i8 [[EX]]
185195
;
186196
%ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
187197
%sh = lshr <2 x i8> %ct, <i8 3, i8 0>
@@ -191,7 +201,9 @@ define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
191201

192202
define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
193203
; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
194-
; CHECK-NEXT: ret <2 x i8> zeroinitializer
204+
; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
205+
; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
206+
; CHECK-NEXT: ret <2 x i8> [[SH]]
195207
;
196208
%ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
197209
%sh = lshr <2 x i8> %ct, <i8 3, i8 3>
@@ -200,7 +212,10 @@ define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
200212

201213
define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
202214
; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec(
203-
; CHECK-NEXT: ret i8 0
215+
; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
216+
; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
217+
; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
218+
; CHECK-NEXT: ret i8 [[EX]]
204219
;
205220
%ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
206221
%sh = lshr <2 x i8> %ct, <i8 3, i8 0>

0 commit comments

Comments
 (0)