diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 0ac0ca7463131..1d7a8b981b5ee 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3490,11 +3490,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. - // We can currently only fold X%N if X is constant. - const SCEVConstant *StartC = dyn_cast(AR->getStart()); - if (StartC && !DivInt.urem(StepInt)) { - const APInt &StartInt = StartC->getAPInt(); - const APInt &StartRem = StartInt.urem(StepInt); + const APInt *StartRem; + if (!DivInt.urem(StepInt) && match(getURemExpr(AR->getStart(), Step), + m_scev_APInt(StartRem))) { bool NoWrap = getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), @@ -3507,10 +3505,15 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // all offsets in [[(X - X%N), X). bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C StepInt.isPowerOf2() && DivInt.isPowerOf2(); - if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) { - const SCEV *NewLHS = getAddRecExpr( - getConstant(StartInt - StartRem), Step, AR->getLoop(), - NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap); + // Only fold if the subtraction can be folded in the start + // expression. + const SCEV *NewStart = + getMinusSCEV(AR->getStart(), getConstant(*StartRem)); + if (*StartRem != 0 && (NoWrap || CanFoldWithWrap) && + !isa(NewStart)) { + const SCEV *NewLHS = + getAddRecExpr(NewStart, Step, AR->getLoop(), + NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap); if (LHS != NewLHS) { LHS = NewLHS; diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll index 9a9a6a7d45931..e041c96371762 100644 --- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll +++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll @@ -180,7 +180,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 @@ -188,7 +188,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.3 = add i64 %iv, 3 ; CHECK-NEXT: --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 -; CHECK-NEXT: --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.4 = add i64 %iv, 4 ; CHECK-NEXT: --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 @@ -196,11 +196,11 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.5 = add i64 %iv, 5 ; CHECK-NEXT: --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 -; CHECK-NEXT: --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 ; CHECK-NEXT: --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 -; CHECK-NEXT: --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}-2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 ; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 @@ -296,7 +296,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 @@ -304,7 +304,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.3 = add i64 %iv, 3 ; CHECK-NEXT: --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 -; CHECK-NEXT: --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.4 = add i64 %iv, 4 ; CHECK-NEXT: --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 @@ -312,11 +312,11 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.5 = add i64 %iv, 5 ; CHECK-NEXT: --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 -; CHECK-NEXT: --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 ; CHECK-NEXT: --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 -; CHECK-NEXT: --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}-2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 ; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll index b612bfb88198e..02c0b676374f4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll @@ -48,4 +48,37 @@ exit: ret void } +define i64 @sdiv_arg_outer_iv(ptr noalias %dst, ptr %src) { +; CHECK: 'sdiv_arg_outer_iv' +; CHECK: Cost of 0 for VF 2: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 4: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 8: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 16: REPLICATE ir<%div> = sdiv ir<%add.offset>, ir<8> +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %offset = shl nsw i32 %outer.iv, 7 + br label %loop + +loop: + %iv = phi i64 [ 0, %outer.header ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + %add.offset = add i32 %offset, %iv.trunc + %div = sdiv i32 %add.offset, 8 + %div.ext = sext i32 %div to i64 + %gep.src = getelementptr i8, ptr %src, i64 %div.ext + %l = load i8, ptr %gep.src, align 1 + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 %l, ptr %gep.dst, align 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 64 + br i1 %ec, label %outer.latch, label %loop + +outer.latch: + %outer.iv.next = add nsw i32 %outer.iv, 1 + br label %outer.header +} + attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }