From 795ecb2f5ee2fcee629049dd81ab83c75a565b86 Mon Sep 17 00:00:00 2001 From: sgokhale Date: Wed, 12 Nov 2025 22:45:46 -0800 Subject: [PATCH] [LoopFusion] Assert failure in the issue 80301 Concerning the test 'IR.ll' mentioned in the issue(same has been added as test case), we have the following scenario before fusion: GC1 --> { L1 --> L2} -- | | |<-------------------- V GC2 --> {L3 --> L4} -- | | |<-------------------- V exit The candidate set before fusion, in the same sequence is {L1, L2, L3, L4}. L1 and L2 get fused thus giving us the single loop L12 with guard condition GC1. L3 and L4 are considered not to have a guard condition. When inserting L12 back into the candidate set again, there was an assert that if entry block of L12(i.e. GC1) dominates entry block of {L3-->L4} (i.e. L3's preheader) then entry block of {L3-->L4} must postdominate entry block of L12. While this is true for fusing candidates, this is not true for inserting candidates into the candidate set(or preparing the new candidate set)(or setting the order of the candidates for the next fusion). This was not happening for the above case and hence the assert failure. This patch tries to resolve this issue by removing the assert and using the assert's condition for deciding the relative order of candidates in the set. --- llvm/lib/Transforms/Scalar/LoopFuse.cpp | 12 +- llvm/test/Transforms/LoopFusion/guarded.ll | 139 +++++++++++++++++++++ 2 files changed, 144 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index 9ffa602416b05..e3a44a4976b7d 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -406,16 +406,14 @@ struct FusionCandidateCompare { // Do this compare first so if LHS == RHS, function returns false. if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) { - // RHS dominates LHS - // Verify LHS post-dominates RHS - assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock)); - return false; + // RHS dominates LHS. + // Check if LHS post-dominates RHS. + return !LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock); } if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) { - // Verify RHS Postdominates LHS - assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock)); - return true; + // Check if RHS postdominates LHS. + return LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock); } // If two FusionCandidates are in the same level of dominator tree, diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll index 863d9b1bb4e86..9d27adfe1fe8f 100644 --- a/llvm/test/Transforms/LoopFusion/guarded.ll +++ b/llvm/test/Transforms/LoopFusion/guarded.ll @@ -390,3 +390,142 @@ for.2: ; preds = %for.cond13, %for.body6, exit: ; preds = %for.cond13 ret void } + +define void @foo(ptr noalias %A, ptr noalias %B, i64 %N) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP4]], label [[BB3:%.*]], label [[BB14:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ] +; CHECK-NEXT: [[I_0510:%.*]] = phi i64 [ [[INC10:%.*]], [[BB5]] ], [ 0, [[BB3]] ] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[I_05]], 3 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_05]], 3 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]] +; CHECK-NEXT: [[REM:%.*]] = srem i64 [[MUL]], [[I_05]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[REM]] to i32 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_05]] +; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[I_0510]] +; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_0510]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[INC10]] = add nsw i64 [[I_0510]], 1 +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i64 [[INC10]], [[N]] +; CHECK-NEXT: br i1 [[CMP10]], label [[BB5]], label [[BB1010:%.*]] +; CHECK: bb1010: +; CHECK-NEXT: br label [[BB14]] +; CHECK: bb14: +; CHECK-NEXT: br i1 [[CMP4]], label [[BB8:%.*]], label [[BB12:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB9]] ], [ 0, [[BB8]] ] +; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3 +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3 +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]] +; CHECK-NEXT: [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]] +; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[REM10]] to i32 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_02]] +; CHECK-NEXT: store i32 [[CONV11]], ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[INC14]] = add nsw i64 [[I1_02]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]] +; CHECK-NEXT: br i1 [[CMP3]], label [[BB9]], label [[BB15:%.*]] +; CHECK: bb15: +; CHECK-NEXT: br label [[BB80:%.*]] +; CHECK: bb80: +; CHECK-NEXT: br label [[BB90:%.*]] +; CHECK: bb90: +; CHECK-NEXT: [[I1_0210:%.*]] = phi i64 [ [[INC1410:%.*]], [[BB90]] ], [ 0, [[BB80]] ] +; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[I1_0210]] to i32 +; CHECK-NEXT: [[ARRAYIDX1210:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_0210]] +; CHECK-NEXT: store i32 [[CONV12]], ptr [[ARRAYIDX1210]], align 4 +; CHECK-NEXT: [[INC1410]] = add nsw i64 [[I1_0210]], 1 +; CHECK-NEXT: [[CMP310:%.*]] = icmp slt i64 [[INC1410]], [[N]] +; CHECK-NEXT: br i1 [[CMP310]], label [[BB90]], label [[BB150:%.*]] +; CHECK: bb150: +; CHECK-NEXT: br label [[BB12]] +; CHECK: bb12: +; CHECK-NEXT: ret void +; +entry: + %cmp4 = icmp slt i64 0, %N + br i1 %cmp4, label %bb3, label %bb14 + +bb3: ; preds = %entry + br label %bb5 + +bb5: ; preds = %bb3, %bb5 + %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ] + %sub = sub nsw i64 %i.05, 3 + %add = add nsw i64 %i.05, 3 + %mul = mul nsw i64 %sub, %add + %rem = srem i64 %mul, %i.05 + %conv = trunc i64 %rem to i32 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.05 + store i32 %conv, ptr %arrayidx, align 4 + %inc = add nsw i64 %i.05, 1 + %cmp = icmp slt i64 %inc, %N + br i1 %cmp, label %bb5, label %bb103 + +bb103: ; preds = %bb103 + br label %bb105 + +bb105: ; preds = %bb103, %bb105 + %i.0510 = phi i64 [ %inc10, %bb105 ], [ 0, %bb103 ] + %arrayidx10 = getelementptr inbounds i32, ptr %B, i64 %i.0510 + %load.b = load i32, ptr %arrayidx10, align 4 + %arrayidx11 = getelementptr inbounds i32, ptr %A, i64 %i.0510 + store i32 0, ptr %arrayidx11, align 4 + %inc10 = add nsw i64 %i.0510, 1 + %cmp10 = icmp slt i64 %inc10, %N + br i1 %cmp10, label %bb105, label %bb1010 + +bb1010: ; preds = %bb105 + br label %bb14 + +bb14: ; preds = %bb1010, %entry + br i1 %cmp4, label %bb8, label %bb12 + +bb8: ; preds = %bb14 + br label %bb9 + +bb9: ; preds = %bb8, %bb9 + %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ] + %sub7 = sub nsw i64 %i1.02, 3 + %add8 = add nsw i64 %i1.02, 3 + %mul9 = mul nsw i64 %sub7, %add8 + %rem10 = srem i64 %mul9, %i1.02 + %conv11 = trunc i64 %rem10 to i32 + %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.02 + store i32 %conv11, ptr %arrayidx12, align 4 + %inc14 = add nsw i64 %i1.02, 1 + %cmp3 = icmp slt i64 %inc14, %N + br i1 %cmp3, label %bb9, label %bb15 + +bb15: ; preds = %bb9 + br label %bb80 + +bb80: ; preds = %bb15 + br label %bb90 + +bb90: ; preds = %bb80, %bb90 + %i1.0210 = phi i64 [ %inc1410, %bb90 ], [ 0, %bb80 ] + %conv12 = trunc i64 %i1.0210 to i32 + %arrayidx1210 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.0210 + store i32 %conv12, ptr %arrayidx1210, align 4 + %inc1410 = add nsw i64 %i1.0210, 1 + %cmp310 = icmp slt i64 %inc1410, %N + br i1 %cmp310, label %bb90, label %bb150 + +bb150: ; preds = %bb90 + br label %bb12 + + +bb12: ; preds = %bb15, %bb14 + ret void +}