Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions llvm/lib/Transforms/Scalar/LoopFuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,16 +406,14 @@ struct FusionCandidateCompare {

// Do this compare first so if LHS == RHS, function returns false.
if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
// RHS dominates LHS
// Verify LHS post-dominates RHS
assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
return false;
// RHS dominates LHS.
// Check if LHS post-dominates RHS.
return !LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock);
Copy link
Contributor

@amehsan amehsan Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what are the implication of violating control flow equivalence of the loops in the candidate set. There is a comment in the code that says members of FusionCandidateSet are control flow equivalent. but I haven't looked into it enough to know what happens if we violate the condition. @1997alireza @CongzheUalberta Do you know about this?

your comment says , while post-domination is required for fusing candidates, it is not required for adding something to the candidate set. Does this mean there is another check of post-dominance relationship later on?

I was going to post a comment on this issue and the one extracted from it, but have been very busy with DA stuff. Will try to post a comment soon (But I may need to read some parts of fusion code, if DA stuff leaves enough time for me).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

your comment says , while post-domination is required for fusing candidates, it is not required for adding something to the candidate set. Does this mean there is another check of post-dominance relationship later on?

'isAdjacent' is doing the same thing I believe ? In fact, 'isAdjacent' is more stricter than saying 2 loops are control flow equivalent.

}

if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
// Verify RHS Postdominates LHS
assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
return true;
// Check if RHS postdominates LHS.
return LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock);
}

// If two FusionCandidates are in the same level of dominator tree,
Expand Down
139 changes: 139 additions & 0 deletions llvm/test/Transforms/LoopFusion/guarded.ll
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,142 @@ for.2: ; preds = %for.cond13, %for.body6,
exit: ; preds = %for.cond13
ret void
}

define void @foo(ptr noalias %A, ptr noalias %B, i64 %N) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP4]], label [[BB3:%.*]], label [[BB14:%.*]]
; CHECK: bb3:
; CHECK-NEXT: br label [[BB5:%.*]]
; CHECK: bb5:
; CHECK-NEXT: [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[I_0510:%.*]] = phi i64 [ [[INC10:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[I_05]], 3
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_05]], 3
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]]
; CHECK-NEXT: [[REM:%.*]] = srem i64 [[MUL]], [[I_05]]
; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[REM]] to i32
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_05]]
; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_05]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[I_0510]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_0510]]
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[INC10]] = add nsw i64 [[I_0510]], 1
; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i64 [[INC10]], [[N]]
; CHECK-NEXT: br i1 [[CMP10]], label [[BB5]], label [[BB1010:%.*]]
; CHECK: bb1010:
; CHECK-NEXT: br label [[BB14]]
; CHECK: bb14:
; CHECK-NEXT: br i1 [[CMP4]], label [[BB8:%.*]], label [[BB12:%.*]]
; CHECK: bb8:
; CHECK-NEXT: br label [[BB9:%.*]]
; CHECK: bb9:
; CHECK-NEXT: [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB9]] ], [ 0, [[BB8]] ]
; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3
; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]]
; CHECK-NEXT: [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]]
; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[REM10]] to i32
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_02]]
; CHECK-NEXT: store i32 [[CONV11]], ptr [[ARRAYIDX12]], align 4
; CHECK-NEXT: [[INC14]] = add nsw i64 [[I1_02]], 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]]
; CHECK-NEXT: br i1 [[CMP3]], label [[BB9]], label [[BB15:%.*]]
; CHECK: bb15:
; CHECK-NEXT: br label [[BB80:%.*]]
; CHECK: bb80:
; CHECK-NEXT: br label [[BB90:%.*]]
; CHECK: bb90:
; CHECK-NEXT: [[I1_0210:%.*]] = phi i64 [ [[INC1410:%.*]], [[BB90]] ], [ 0, [[BB80]] ]
; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[I1_0210]] to i32
; CHECK-NEXT: [[ARRAYIDX1210:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_0210]]
; CHECK-NEXT: store i32 [[CONV12]], ptr [[ARRAYIDX1210]], align 4
; CHECK-NEXT: [[INC1410]] = add nsw i64 [[I1_0210]], 1
; CHECK-NEXT: [[CMP310:%.*]] = icmp slt i64 [[INC1410]], [[N]]
; CHECK-NEXT: br i1 [[CMP310]], label [[BB90]], label [[BB150:%.*]]
; CHECK: bb150:
; CHECK-NEXT: br label [[BB12]]
; CHECK: bb12:
; CHECK-NEXT: ret void
;
entry:
%cmp4 = icmp slt i64 0, %N
br i1 %cmp4, label %bb3, label %bb14

bb3: ; preds = %entry
br label %bb5

bb5: ; preds = %bb3, %bb5
%i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ]
%sub = sub nsw i64 %i.05, 3
%add = add nsw i64 %i.05, 3
%mul = mul nsw i64 %sub, %add
%rem = srem i64 %mul, %i.05
%conv = trunc i64 %rem to i32
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.05
store i32 %conv, ptr %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
%cmp = icmp slt i64 %inc, %N
br i1 %cmp, label %bb5, label %bb103

bb103: ; preds = %bb103
br label %bb105

bb105: ; preds = %bb103, %bb105
%i.0510 = phi i64 [ %inc10, %bb105 ], [ 0, %bb103 ]
%arrayidx10 = getelementptr inbounds i32, ptr %B, i64 %i.0510
%load.b = load i32, ptr %arrayidx10, align 4
%arrayidx11 = getelementptr inbounds i32, ptr %A, i64 %i.0510
store i32 0, ptr %arrayidx11, align 4
%inc10 = add nsw i64 %i.0510, 1
%cmp10 = icmp slt i64 %inc10, %N
br i1 %cmp10, label %bb105, label %bb1010

bb1010: ; preds = %bb105
br label %bb14

bb14: ; preds = %bb1010, %entry
br i1 %cmp4, label %bb8, label %bb12

bb8: ; preds = %bb14
br label %bb9

bb9: ; preds = %bb8, %bb9
%i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ]
%sub7 = sub nsw i64 %i1.02, 3
%add8 = add nsw i64 %i1.02, 3
%mul9 = mul nsw i64 %sub7, %add8
%rem10 = srem i64 %mul9, %i1.02
%conv11 = trunc i64 %rem10 to i32
%arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.02
store i32 %conv11, ptr %arrayidx12, align 4
%inc14 = add nsw i64 %i1.02, 1
%cmp3 = icmp slt i64 %inc14, %N
br i1 %cmp3, label %bb9, label %bb15

bb15: ; preds = %bb9
br label %bb80

bb80: ; preds = %bb15
br label %bb90

bb90: ; preds = %bb80, %bb90
%i1.0210 = phi i64 [ %inc1410, %bb90 ], [ 0, %bb80 ]
%conv12 = trunc i64 %i1.0210 to i32
%arrayidx1210 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.0210
store i32 %conv12, ptr %arrayidx1210, align 4
%inc1410 = add nsw i64 %i1.0210, 1
%cmp310 = icmp slt i64 %inc1410, %N
br i1 %cmp310, label %bb90, label %bb150

bb150: ; preds = %bb90
br label %bb12


bb12: ; preds = %bb15, %bb14
ret void
}
Loading