Skip to content

Commit

Permalink
[LV] Do not add load to group if it moves across conflicting store.
Browse files Browse the repository at this point in the history
This patch prevents invalid load groups from being formed, where a load
needs to be moved across a conflicting store.

Once we hit a store that conflicts with a load with an existing
interleave group, we need to stop adding earlier loads to the group, as
this would force hoisting the previous stores in the group across the
conflicting load.

To detect such cases, add a new CompletedLoadGroups set, which is used
to keep track of load groups to which no earlier loads can be added.

Fixes #63602

Reviewed By: anna

Differential Revision: https://reviews.llvm.org/D154309
  • Loading branch information
fhahn committed Jul 7, 2023
1 parent fc9821a commit 4d847bf
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 23 deletions.
20 changes: 19 additions & 1 deletion llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
// Holds all interleaved load groups temporarily.
SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
// Groups added to this set cannot have new members added.
SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;

// Search in bottom-up program order for pairs of accesses (A and B) that can
// form interleaved load or store groups. In the algorithm below, access A
Expand Down Expand Up @@ -1139,8 +1141,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
}
if (B->mayWriteToMemory())
StoreGroups.insert(Group);
else
else {
// Skip B if no new instructions can be added to its load group.
if (CompletedLoadGroups.contains(Group))
continue;
LoadGroups.insert(Group);
}
}

for (auto AI = std::next(BI); AI != E; ++AI) {
Expand Down Expand Up @@ -1181,6 +1187,18 @@ void InterleavedAccessInfo::analyzeInterleaving(
StoreGroups.remove(StoreGroup);
releaseGroup(StoreGroup);
}
// If B is a load and part of an interleave group, no earlier loads can
// be added to B's interleave group, because this would mean the load B
// would need to be moved across store A. Mark the interleave group as
// complete.
if (isInterleaved(B) && isa<LoadInst>(B)) {
InterleaveGroup<Instruction> *LoadGroup = getInterleaveGroup(B);

LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
<< " as complete.\n");

CompletedLoadGroups.insert(LoadGroup);
}

// If a dependence exists and A is not already in a group (or it was
// and we just released it), B might be hoisted above A (if B is a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,41 @@ define void @pr63602_1(ptr %arr) {
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 3
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 0
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP18]], i32 1
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i32 2
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3
; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP6]], 2
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP20]], align 4
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
; CHECK-NEXT: [[TMP21:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[STRIDED_VEC4]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0
; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2
; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3
; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 17, 16
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 49, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 52, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY]] ]
Expand All @@ -81,7 +84,7 @@ define void @pr63602_1(ptr %arr) {
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_IV_2]], align 4
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 3
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[IV_2]], 50
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -110,4 +113,3 @@ loop:
exit:
ret void
}

0 comments on commit 4d847bf

Please sign in to comment.