Skip to content

Commit

Permalink
[CodeGen] Generate llvm.loop.parallel_accesses instead of llvm.mem.pa…
Browse files Browse the repository at this point in the history
…rallel_loop_access metadata.

Instead of generating llvm.mem.parallel_loop_access metadata, generate
llvm.access.group on instructions and llvm.loop.parallel_accesses on
loops. There is one access group per generated loop.

This is clang part of D52116/r349725.

Differential Revision: https://reviews.llvm.org/D52117

llvm-svn: 349823
  • Loading branch information
Meinersbur committed Dec 20, 2018
1 parent a6b9c68 commit 0535137
Show file tree
Hide file tree
Showing 17 changed files with 326 additions and 262 deletions.
40 changes: 24 additions & 16 deletions clang/lib/CodeGen/CGLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using namespace llvm;

static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc,
const llvm::DebugLoc &EndLoc) {
const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) {

if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
Expand Down Expand Up @@ -122,6 +122,12 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}

if (Attrs.IsParallel) {
AccGroup = MDNode::getDistinct(Ctx, {});
Args.push_back(MDNode::get(
Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup}));
}

// Set the first operand to itself.
MDNode *LoopID = MDNode::get(Ctx, Args);
LoopID->replaceOperandWith(0, LoopID);
Expand Down Expand Up @@ -150,7 +156,8 @@ void LoopAttributes::clear() {
LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc)
: LoopID(nullptr), Header(Header), Attrs(Attrs) {
LoopID = createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc);
LoopID =
createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup);
}

void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc,
Expand Down Expand Up @@ -328,6 +335,21 @@ void LoopInfoStack::pop() {
}

void LoopInfoStack::InsertHelper(Instruction *I) const {
if (I->mayReadOrWriteMemory()) {
SmallVector<Metadata *, 4> AccessGroups;
for (const LoopInfo &AL : Active) {
// Here we assume that every loop that has an access group is parallel.
if (MDNode *Group = AL.getAccessGroup())
AccessGroups.push_back(Group);
}
MDNode *UnionMD = nullptr;
if (AccessGroups.size() == 1)
UnionMD = cast<MDNode>(AccessGroups[0]);
else if (AccessGroups.size() >= 2)
UnionMD = MDNode::get(I->getContext(), AccessGroups);
I->setMetadata("llvm.access.group", UnionMD);
}

if (!hasInfo())
return;

Expand All @@ -343,18 +365,4 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
}
return;
}

if (I->mayReadOrWriteMemory()) {
SmallVector<Metadata *, 2> ParallelLoopIDs;
for (const LoopInfo &AL : Active)
if (AL.getAttributes().IsParallel)
ParallelLoopIDs.push_back(AL.getLoopID());

MDNode *ParallelMD = nullptr;
if (ParallelLoopIDs.size() == 1)
ParallelMD = cast<MDNode>(ParallelLoopIDs[0]);
else if (ParallelLoopIDs.size() >= 2)
ParallelMD = MDNode::get(I->getContext(), ParallelLoopIDs);
I->setMetadata("llvm.mem.parallel_loop_access", ParallelMD);
}
}
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGLoopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,18 @@ class LoopInfo {
/// Get the set of attributes active for this loop.
const LoopAttributes &getAttributes() const { return Attrs; }

/// Return this loop's access group or nullptr if it does not have one.
llvm::MDNode *getAccessGroup() const { return AccGroup; }

private:
/// Loop ID metadata.
llvm::MDNode *LoopID;
/// Header block of this loop.
llvm::BasicBlock *Header;
/// The attributes for this loop.
LoopAttributes Attrs;
/// The access group for memory accesses parallel to this loop.
llvm::MDNode *AccGroup = nullptr;
};

/// A stack of loop information corresponding to loop nesting levels.
Expand Down
32 changes: 32 additions & 0 deletions clang/test/CodeGenCXX/pragma-loop-safety-imperfectly_nested.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s

// Verify that the outer loop has the llvm.access.group property for the
// accesses outside and inside the inner loop, even when the inner loop
// is not perfectly nested.
void vectorize_imperfectly_nested_test(int *List, int Length) {
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
for (int i = 0; i < Length; ++i) {
List[i * Length] = 42;
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
for (int j = 1; j < Length - 1; ++j)
List[i * Length + j] = (i + j) * 2;
List[(i + 1) * Length - 1] = 21;
}
}


// CHECK: load i32, i32* %Length.addr, align 4, !llvm.access.group ![[ACCESS_GROUP_2:[0-9]+]]

// CHECK: %[[MUL:.+]] = mul nsw i32 %add, 2
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.access.group ![[ACCESS_GROUP_3:[0-9]+]]
// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
// CHECK: store i32 21, i32* %{{.+}}, !llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID:[0-9]+]]

// CHECK: ![[ACCESS_GROUP_2]] = distinct !{}
// CHECK: ![[ACCESS_GROUP_LIST_3:[0-9]+]] = !{![[ACCESS_GROUP_2]], ![[ACCESS_GROUP_4:[0-9]+]]}
// CHECK: ![[ACCESS_GROUP_4]] = distinct !{}
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]], {{.*}} ![[PARALLEL_ACCESSES_8:[0-9]+]]}
// CHECK: ![[PARALLEL_ACCESSES_8]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_4]]}
// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]], {{.*}} ![[PARALLEL_ACCESSES_10:[0-9]+]]}
// CHECK: ![[PARALLEL_ACCESSES_10]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_2]]}
18 changes: 12 additions & 6 deletions clang/test/CodeGenCXX/pragma-loop-safety-nested.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s

// Verify that the inner access is tagged with a parallel_loop_access
// for the inner and outer loop using a list.
// Verify that the outer loop has the llvm.access.group property for the
// accesses outside and inside the inner loop.
void vectorize_nested_test(int *List, int Length) {
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
for (int i = 0; i < Length; ++i) {
Expand All @@ -11,11 +11,17 @@ void vectorize_nested_test(int *List, int Length) {
}
}


// CHECK: load i32, i32* %Length.addr, align 4, !llvm.access.group ![[ACCESS_GROUP_2:[0-9]+]]
// CHECK: %[[MUL:.+]] = mul
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[PARALLEL_LIST:[0-9]+]]
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.access.group ![[ACCESS_GROUP_LIST_3:[0-9]+]]
// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID:[0-9]+]]

// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
// CHECK: ![[PARALLEL_LIST]] = !{![[OUTER_LOOPID]], ![[INNER_LOOPID]]}
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],
// CHECK: ![[ACCESS_GROUP_2]] = distinct !{}
// CHECK: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_2]], ![[ACCESS_GROUP_4:[0-9]+]]}
// CHECK: ![[ACCESS_GROUP_4]] = distinct !{}
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]], {{.*}} ![[PARALLEL_ACCESSES_8:[0-9]+]]}
// CHECK: ![[PARALLEL_ACCESSES_8]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_4]]}
// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]], {{.*}} ![[PARALLEL_ACCESSES_10:[0-9]+]]}
// CHECK: ![[PARALLEL_ACCESSES_10]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_2]]}
12 changes: 7 additions & 5 deletions clang/test/CodeGenCXX/pragma-loop-safety-outer.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s

// Verify that the inner access is tagged with a parallel_loop_access
// for the outer loop.
// Verify that the outer loop has the inner loop's access in its
// llvm.loop.parallel_accesses property.
void vectorize_outer_test(int *List, int Length) {
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
for (int i = 0; i < Length; i += 2) {
Expand All @@ -12,9 +12,11 @@ void vectorize_outer_test(int *List, int Length) {
}

// CHECK: %[[MUL:.+]] = mul
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[OUTER_LOOPID:[0-9]+]]
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.access.group ![[ACCESS_GROUP_2:[0-9]+]]
// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID]]
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID:[0-9]+]]

// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
// CHECK: ![[ACCESS_GROUP_2]] = distinct !{}
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],
// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]], {{.*}} ![[PARALLEL_ACCESSES_9:[0-9]+]]}
// CHECK: ![[PARALLEL_ACCESSES_9]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_2]]}
32 changes: 18 additions & 14 deletions clang/test/CodeGenCXX/pragma-loop-safety.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
// Verify assume_safety vectorization is recognized.
void vectorize_test(int *List, int Length) {
// CHECK: define {{.*}} @_Z14vectorize_test
// CHECK: [[LOAD1_IV:.+]] = load i32, i32* [[IV1:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID:[0-9]+]]
// CHECK-NEXT: [[LOAD1_LEN:.+]] = load i32, i32* [[LEN1:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
// CHECK: [[LOAD1_IV:.+]] = load i32, i32* [[IV1:[^,]+]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_2:[0-9]+]]
// CHECK-NEXT: [[LOAD1_LEN:.+]] = load i32, i32* [[LEN1:.+]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK-NEXT: [[CMP1:.+]] = icmp slt i32[[LOAD1_IV]],[[LOAD1_LEN]]
// CHECK-NEXT: br i1[[CMP1]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
for (int i = 0; i < Length; i++) {
// CHECK: [[RHIV1:.+]] = load i32, i32* [[IV1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
// CHECK: [[RHIV1:.+]] = load i32, i32* [[IV1]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK-DAG: [[CALC1:.+]] = mul nsw i32[[RHIV1]], 2
// CHECK-DAG: [[SIV1:.+]] = load i32, i32* [[IV1]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
// CHECK-DAG: [[SIV1:.+]] = load i32, i32* [[IV1]]{{.*}}!llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK-DAG: [[INDEX1:.+]] = sext i32[[SIV1]] to i64
// CHECK-DAG: [[ARRAY1:.+]] = load i32*, i32** [[LIST1:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
// CHECK-DAG: [[ARRAY1:.+]] = load i32*, i32** [[LIST1:.*]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK-DAG: [[PTR1:.+]] = getelementptr inbounds i32, i32*[[ARRAY1]], i64[[INDEX1]]
// CHECK: store i32[[CALC1]], i32*[[PTR1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
// CHECK: store i32[[CALC1]], i32*[[PTR1]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_2]]
// CHECK-NEXT: br label [[LOOP1_INC:[^,]+]]
List[i] = i * 2;

Expand All @@ -26,29 +26,33 @@ void vectorize_test(int *List, int Length) {
// Verify assume_safety interleaving is recognized.
void interleave_test(int *List, int Length) {
// CHECK: define {{.*}} @_Z15interleave_test
// CHECK: [[LOAD2_IV:.+]] = load i32, i32* [[IV2:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID:[0-9]+]]
// CHECK-NEXT: [[LOAD2_LEN:.+]] = load i32, i32* [[LEN2:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
// CHECK: [[LOAD2_IV:.+]] = load i32, i32* [[IV2:[^,]+]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_8:[0-9]+]]
// CHECK-NEXT: [[LOAD2_LEN:.+]] = load i32, i32* [[LEN2:.+]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_8]]
// CHECK-NEXT: [[CMP2:.+]] = icmp slt i32[[LOAD2_IV]],[[LOAD2_LEN]]
// CHECK-NEXT: br i1[[CMP2]], label %[[LOOP2_BODY:[^,]+]], label %[[LOOP2_END:[^,]+]]
#pragma clang loop interleave(assume_safety) vectorize(disable) unroll(disable)
for (int i = 0; i < Length; i++) {
// CHECK: [[RHIV2:.+]] = load i32, i32* [[IV2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
// CHECK: [[RHIV2:.+]] = load i32, i32* [[IV2]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_8]]
// CHECK-DAG: [[CALC2:.+]] = mul nsw i32[[RHIV2]], 2
// CHECK-DAG: [[SIV2:.+]] = load i32, i32* [[IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
// CHECK-DAG: [[SIV2:.+]] = load i32, i32* [[IV2]]{{.*}}!llvm.access.group ![[ACCESS_GROUP_8]]
// CHECK-DAG: [[INDEX2:.+]] = sext i32[[SIV2]] to i64
// CHECK-DAG: [[ARRAY2:.+]] = load i32*, i32** [[LIST2:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
// CHECK-DAG: [[ARRAY2:.+]] = load i32*, i32** [[LIST2:.*]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_8]]
// CHECK-DAG: [[PTR2:.+]] = getelementptr inbounds i32, i32*[[ARRAY2]], i64[[INDEX2]]
// CHECK: store i32[[CALC2]], i32*[[PTR2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
// CHECK: store i32[[CALC2]], i32*[[PTR2]], {{.*}}!llvm.access.group ![[ACCESS_GROUP_8]]
// CHECK-NEXT: br label [[LOOP2_INC:[^,]+]]
List[i] = i * 2;

// CHECK: br label [[LOOP2_COND:[^,]+]], !llvm.loop ![[LOOP2_HINTS:[0-9]+]]
}
}

// CHECK: ![[LOOP1_HINTS]] = distinct !{![[LOOP1_HINTS]], ![[INTERLEAVE_1:[0-9]+]], ![[INTENABLE_1:[0-9]+]], ![[UNROLL_DISABLE:[0-9]+]]}
// CHECK: ![[ACCESS_GROUP_2]] = distinct !{}
// CHECK: ![[LOOP1_HINTS]] = distinct !{![[LOOP1_HINTS]], ![[INTERLEAVE_1:[0-9]+]], ![[INTENABLE_1:[0-9]+]], ![[UNROLL_DISABLE:[0-9]+]], ![[PARALLEL_ACCESSES_7:[0-9]+]]}
// CHECK: ![[INTERLEAVE_1]] = !{!"llvm.loop.interleave.count", i32 1}
// CHCCK: ![[INTENABLE_1]] = !{!"llvm.loop.vectorize.enable", i1 true}
// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
// CHECK: ![[LOOP2_HINTS]] = distinct !{![[LOOP2_HINTS]], ![[WIDTH_1:[0-9]+]], ![[INTENABLE_1]], ![[UNROLL_DISABLE]]}
// CHECK: ![[PARALLEL_ACCESSES_7]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_2]]}
// CHECK: ![[ACCESS_GROUP_8]] = distinct !{}
// CHECK: ![[LOOP2_HINTS]] = distinct !{![[LOOP2_HINTS]], ![[WIDTH_1:[0-9]+]], ![[INTENABLE_1]], ![[UNROLL_DISABLE]], ![[PARALLEL_ACCESSES_11:[0-9]+]]}
// CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
// CHECK: ![[PARALLEL_ACCESSES_11]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_8]]}
14 changes: 7 additions & 7 deletions clang/test/OpenMP/for_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NOT: !llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
Expand Down Expand Up @@ -114,7 +114,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NOT: !llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
Expand Down Expand Up @@ -163,7 +163,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NOT: !llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1
Expand Down Expand Up @@ -215,7 +215,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
Expand Down Expand Up @@ -256,7 +256,7 @@ void guided7(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
Expand Down Expand Up @@ -301,7 +301,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NOT: !llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
Expand Down Expand Up @@ -343,7 +343,7 @@ void runtime(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NOT: !llvm.access.group
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
Expand Down
16 changes: 8 additions & 8 deletions clang/test/OpenMP/for_simd_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,31 +73,31 @@ void simple(float *a, float *b, float *c, float *d) {
// CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
// CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV2:%[^,]+]],

// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID:[0-9]+]]
// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[CMP2:%.+]] = icmp sle i32 [[IV2]], [[UB_VAL]]
// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]]
for (int i = 10; i > 1; i--) {
// CHECK: [[SIMPLE_LOOP2_BODY]]:
// Start of body: calculate i from IV:
// CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
// FIXME: It is interesting, why the following "mul 1" was not constant folded?
// CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1
// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]]
// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group
//
// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3
// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
// Update of the privatized version of linear variable!
// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
a[k]++;
k = k + 3;
// CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group
// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1
// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.access.group
// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]]
}
// CHECK: [[SIMPLE_LOOP2_END]]:
Expand Down
Loading

0 comments on commit 0535137

Please sign in to comment.