-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Hoist loads with invariant addresses using noalias metadata. #166247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2cbe52b
6f781f2
3d4eb1b
5ad89c4
60e8cd5
9141570
9dea1f4
dd49d9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| #include "llvm/Support/Compiler.h" | ||
|
|
||
| namespace llvm { | ||
| class MemoryLocation; | ||
| class ScalarEvolution; | ||
| class SCEV; | ||
| } // namespace llvm | ||
|
|
@@ -74,6 +75,11 @@ getRecipesForUncountableExit(VPlan &Plan, | |
| SmallVectorImpl<VPRecipeBase *> &Recipes, | ||
| SmallVectorImpl<VPRecipeBase *> &GEPs); | ||
|
|
||
| /// Return a MemoryLocation for \p R with noalias metadata populated from | ||
| /// \p R, if the recipe is supported and std::nullopt otherwise. The pointer of | ||
| /// the location is conservatively set to nullptr. | ||
| std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth adding a comment saying something like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
|
|
||
| /// Extracts and returns NoWrap and FastMath flags from the induction binop in | ||
| /// \p ID. | ||
| inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { | |
| ; DEFAULT: vector.ph: | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer | ||
| ; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8> | ||
| ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] | ||
| ; DEFAULT: vector.body: | ||
| ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
| ; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]] | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0 | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer | ||
| ; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8> | ||
| ; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8> | ||
| ; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]] | ||
| ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] | ||
| ; DEFAULT: vector.body: | ||
| ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
| ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] | ||
| ; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 | ||
| ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] | ||
|
|
@@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { | |
| ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0 | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer | ||
| ; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8> | ||
| ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] | ||
| ; DEFAULT: vec.epilog.vector.body: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This transformation seems to be taking place without any noalias metadata. I wonder if it's worth splitting the PR up into two parts - a first patch that does the optimisation without using the metadata, and a follow-on to use the metadata?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should only ever do the transformation using noalias metadata (the pointer of MemoryLocation is set to nullptr, so cannot be used to determine noalias). The way the diff is highligthed here makes it a bit difficult to see, but we hoist The only store in the loop (
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I must be missing something because I don't see any metadata on the original scalar loop: and the function pointer arguments do not have Is it the loop vectoriser itself that is first adding the metadata, then using it to perform the transformation?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is using the noalias metadata the loop-vectorizer generates when emitting memory runtime checks; it will also work for cases where we already have !noalias in the scalar loop, but those cases are much less interesting I think, as then other transformations will do the hoisting earlier.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK that makes sense, thanks for explaining! |
||
| ; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]] | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0 | ||
| ; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer | ||
| ; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8> | ||
| ; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8> | ||
| ; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]] | ||
| ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] | ||
| ; DEFAULT: vec.epilog.vector.body: | ||
| ; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]] | ||
| ; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]] | ||
| ; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it matter if there is more than one entry of kind
Kindin the list? Is it worth updating the comments above the function to say something like "this returns the first instance of kind \p Kind ..."?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There must be at most one entry for each kind (to add multiple entries for the same kind would require kind-dependent merging).