Skip to content

Commit b50ad94

Browse files
authored
[InstSimplify] Simplify extractvalue (umul_with_overflow(x, 1)). (#157307)
Look through extractvalue to simplify umul_with_overflow where one of the operands is 1. This removes some redundant instructions when expanding SCEVs, which in turn makes the runtime check cost estimate more accurate, reducing the minimum iterations for which vectorization is profitable. PR: #157307
1 parent e9499e8 commit b50ad94

File tree

8 files changed

+98
-58
lines changed

8 files changed

+98
-58
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5242,6 +5242,19 @@ static Value *simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
52425242
}
52435243
}
52445244

5245+
// Simplify umul_with_overflow where one operand is 1.
5246+
Value *V;
5247+
if (Idxs.size() == 1 &&
5248+
(match(Agg,
5249+
m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value(V), m_One())) ||
5250+
match(Agg, m_Intrinsic<Intrinsic::umul_with_overflow>(m_One(),
5251+
m_Value(V))))) {
5252+
if (Idxs[0] == 0)
5253+
return V;
5254+
assert(Idxs[0] == 1 && "invalid index");
5255+
return getFalse(CmpInst::makeCmpResultType(V->getType()));
5256+
}
5257+
52455258
return nullptr;
52465259
}
52475260

llvm/test/Transforms/InstSimplify/fold-intrinsics.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,3 +610,63 @@ define void @umul_fix_sat_poison(ptr %P) {
610610

611611
ret void
612612
}
613+
614+
declare void @use.i32(i32, i1)
615+
616+
define void @umul_extractvalue(ptr %P, i32 %x) {
617+
; CHECK-LABEL: @umul_extractvalue(
618+
; CHECK-NEXT: call void @use.i32(i32 [[X:%.*]], i1 false)
619+
; CHECK-NEXT: call void @use.i32(i32 [[X]], i1 false)
620+
; CHECK-NEXT: [[UMUL_3:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[X]])
621+
; CHECK-NEXT: [[R_3:%.*]] = extractvalue { i32, i1 } [[UMUL_3]], 0
622+
; CHECK-NEXT: [[OV_3:%.*]] = extractvalue { i32, i1 } [[UMUL_3]], 1
623+
; CHECK-NEXT: call void @use.i32(i32 [[R_3]], i1 [[OV_3]])
624+
; CHECK-NEXT: ret void
625+
;
626+
%umul.1 = call {i32, i1} @llvm.umul.with.overflow(i32 %x, i32 1)
627+
%r.1 = extractvalue {i32, i1} %umul.1, 0
628+
%ov.1 = extractvalue {i32, i1} %umul.1, 1
629+
call void @use.i32(i32 %r.1, i1 %ov.1)
630+
631+
%umul.2 = call {i32, i1} @llvm.umul.with.overflow(i32 1, i32 %x)
632+
%r.2 = extractvalue {i32, i1} %umul.2, 0
633+
%ov.2 = extractvalue {i32, i1} %umul.2, 1
634+
call void @use.i32(i32 %r.2, i1 %ov.2)
635+
636+
%umul.3 = call {i32, i1} @llvm.umul.with.overflow(i32 2, i32 %x)
637+
%r.3 = extractvalue {i32, i1} %umul.3, 0
638+
%ov.3 = extractvalue {i32, i1} %umul.3, 1
639+
call void @use.i32(i32 %r.3, i1 %ov.3)
640+
641+
ret void
642+
}
643+
644+
declare void @use.4xi32(<4 x i32>, <4 x i1>)
645+
646+
define void @umul_extractvalue_vec(ptr %P, <4 x i32> %x) {
647+
; CHECK-LABEL: @umul_extractvalue_vec(
648+
; CHECK-NEXT: call void @use.4xi32(<4 x i32> [[X:%.*]], <4 x i1> zeroinitializer)
649+
; CHECK-NEXT: call void @use.4xi32(<4 x i32> [[X]], <4 x i1> zeroinitializer)
650+
; CHECK-NEXT: [[UMUL_3:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> splat (i32 2), <4 x i32> [[X]])
651+
; CHECK-NEXT: [[R_3:%.*]] = extractvalue { <4 x i32>, <4 x i1> } [[UMUL_3]], 0
652+
; CHECK-NEXT: [[OV_3:%.*]] = extractvalue { <4 x i32>, <4 x i1> } [[UMUL_3]], 1
653+
; CHECK-NEXT: call void @use.4xi32(<4 x i32> [[R_3]], <4 x i1> [[OV_3]])
654+
; CHECK-NEXT: ret void
655+
;
656+
%umul.1 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
657+
%r.1 = extractvalue {<4 x i32>, <4 x i1>} %umul.1, 0
658+
%ov.1 = extractvalue {<4 x i32>, <4 x i1>} %umul.1, 1
659+
call void @use.4xi32(<4 x i32> %r.1, <4 x i1> %ov.1)
660+
661+
%umul.2 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %x)
662+
%r.2 = extractvalue {<4 x i32>, <4 x i1>} %umul.2, 0
663+
%ov.2 = extractvalue {<4 x i32>, <4 x i1>} %umul.2, 1
664+
call void @use.4xi32(<4 x i32> %r.2, <4 x i1> %ov.2)
665+
666+
%umul.3 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> <i32 2, i32 2, i32 2, i32 2>, <4 x i32> %x)
667+
%r.3 = extractvalue {<4 x i32>, <4 x i1>} %umul.3, 0
668+
%ov.3 = extractvalue {<4 x i32>, <4 x i1>} %umul.3, 1
669+
call void @use.4xi32(<4 x i32> %r.3, <4 x i1> %ov.3)
670+
671+
ret void
672+
}

llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,10 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) {
245245
; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
246246
; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
247247
; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
248-
; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
249-
; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
250-
; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
248+
; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
251249
; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
252-
; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
253250
; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
254-
; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
251+
; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
255252
; VF-TWO-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
256253
; VF-TWO-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
257254
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
@@ -373,13 +370,10 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) {
373370
; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
374371
; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
375372
; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
376-
; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
377-
; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
378-
; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
373+
; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
379374
; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
380-
; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
381375
; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
382-
; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
376+
; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
383377
; VF-FOUR-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
384378
; VF-FOUR-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
385379
; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -194,14 +194,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
194194
; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
195195
; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
196196
; RV64-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP5]])
197-
; RV64-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
198-
; RV64-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
199-
; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[MUL_RESULT]]
197+
; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
200198
; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
201-
; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
202199
; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
203-
; RV64-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
204-
; RV64-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
200+
; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]]
201+
; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
205202
; RV64: [[VECTOR_MEMCHECK]]:
206203
; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
207204
; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
@@ -334,13 +331,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
334331
; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
335332
; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
336333
; RV64-UF2-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP5]])
337-
; RV64-UF2-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
338-
; RV64-UF2-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
339-
; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[MUL_RESULT]]
334+
; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
340335
; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
341-
; RV64-UF2-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
342336
; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
343-
; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
337+
; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]]
344338
; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
345339
; RV64-UF2: [[VECTOR_MEMCHECK]]:
346340
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
@@ -455,14 +449,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
455449
; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
456450
; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
457451
; RV64-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP5]])
458-
; RV64-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
459-
; RV64-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
460-
; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[MUL_RESULT]]
452+
; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
461453
; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
462-
; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
463454
; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
464-
; RV64-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
465-
; RV64-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
455+
; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]]
456+
; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
466457
; RV64: [[VECTOR_MEMCHECK]]:
467458
; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
468459
; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
@@ -595,13 +586,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
595586
; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
596587
; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
597588
; RV64-UF2-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP5]])
598-
; RV64-UF2-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
599-
; RV64-UF2-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
600-
; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[MUL_RESULT]]
589+
; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
601590
; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
602-
; RV64-UF2-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
603591
; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
604-
; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
592+
; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]]
605593
; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
606594
; RV64-UF2: [[VECTOR_MEMCHECK]]:
607595
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()

llvm/test/Transforms/LoopVectorize/X86/pr35432.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define i32 @main(ptr %ptr) {
3838
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
3939
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP2]])
4040
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[UMIN1]]
41-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 40
41+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 36
4242
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4343
; CHECK: vector.scevcheck:
4444
; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[CONV3]], -1
@@ -47,13 +47,10 @@ define i32 @main(ptr %ptr) {
4747
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[UMIN]]
4848
; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
4949
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP8]])
50-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
51-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
52-
; CHECK-NEXT: [[TMP9:%.*]] = sub i8 [[TMP5]], [[MUL_RESULT]]
50+
; CHECK-NEXT: [[TMP9:%.*]] = sub i8 [[TMP5]], [[TMP8]]
5351
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i8 [[TMP9]], [[TMP5]]
54-
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
5552
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i32 [[TMP7]], 255
56-
; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
53+
; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP12]]
5754
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[DOTPROMOTED]], 1
5855
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP7]]
5956
; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP14]]

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,10 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
156156
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
157157
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
158158
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
159-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
160-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
161-
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
159+
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
162160
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
163-
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
164161
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
165-
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
162+
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
166163
; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
167164
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
168165
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4

llvm/test/Transforms/LoopVectorize/pr37248.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,10 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
2727
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16
2828
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16
2929
; CHECK-NEXT: [[MUL:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 1, i16 [[TMP4]])
30-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i16, i1 } [[MUL]], 0
31-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i16, i1 } [[MUL]], 1
32-
; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[MUL_RESULT]]
30+
; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]]
3331
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]]
34-
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW]]
3532
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535
36-
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
33+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
3734
; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
3835
; CHECK: [[VECTOR_PH]]:
3936
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
@@ -109,13 +106,10 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) {
109106
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16
110107
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16
111108
; CHECK-NEXT: [[MUL:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 1, i16 [[TMP4]])
112-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i16, i1 } [[MUL]], 0
113-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i16, i1 } [[MUL]], 1
114-
; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[MUL_RESULT]]
109+
; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]]
115110
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]]
116-
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW]]
117111
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535
118-
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
112+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
119113
; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
120114
; CHECK: [[VECTOR_PH]]:
121115
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2

llvm/test/Transforms/LoopVectorize/reverse_induction.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,9 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
150150
; CHECK: [[VECTOR_SCEVCHECK]]:
151151
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[STARTVAL]], -1
152152
; CHECK-NEXT: [[MUL:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 1, i16 1023)
153-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i16, i1 } [[MUL]], 0
154-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i16, i1 } [[MUL]], 1
155-
; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], [[MUL_RESULT]]
153+
; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], 1023
156154
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[TMP0]]
157-
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
158-
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
155+
; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
159156
; CHECK: [[VECTOR_PH]]:
160157
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
161158
; CHECK: [[VECTOR_BODY]]:

0 commit comments

Comments
 (0)