-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LoadStoreVectorizer] Only upgrade align for alloca
In commit 2be0abb (D149893) the load store vectorized was reimplemented. One thing that can happen with the new LSV is that it can increase the align of alloca and global objects. However, the code comments indicate that the intention only was to increase alignment of alloca. Now we will use stripPointerCasts to analyse if the load/store really is accessing an alloca (same as getOrEnforceKnownAlignment is using). And then we only try to change the align if we find an alloca instruction. This way the code will match better with code comments, and we won't change alignment of non-stack variables to use the "StackAdjustedAlignment". Differential Revision: https://reviews.llvm.org/D152386
- Loading branch information
Showing
3 changed files
with
99 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/dont-adjust-globalobj-alignment.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
; RUN: opt -S -passes=load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck --match-full-lines %s | ||
|
||
target triple = "amdgcn--" | ||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" | ||
|
||
@G = internal addrspace(5) global [8 x i16] undef, align 1 | ||
|
||
; Verify that the alignment of the global remains at 1, even if we vectorize | ||
; the stores. | ||
; | ||
; CHECK: @G = internal addrspace(5) global [8 x i16] undef, align 1 | ||
|
||
define void @private_store_2xi16_align2_not_alloca(ptr addrspace(5) %p, ptr addrspace(5) %r) { | ||
; CHECK: define void @private_store_2xi16_align2_not_alloca(ptr addrspace(5) [[P:%.*]], ptr addrspace(5) [[R:%.*]]) #0 { | ||
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i16, ptr addrspace(5) @G, i32 0 | ||
; CHECK-NEXT: store <2 x i16> <i16 1, i16 2>, ptr addrspace(5) [[GEP0]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
%gep0 = getelementptr i16, ptr addrspace(5) @G, i32 0 | ||
%gep1 = getelementptr i16, ptr addrspace(5) @G, i32 1 | ||
store i16 1, ptr addrspace(5) %gep0, align 1 | ||
store i16 2, ptr addrspace(5) %gep1, align 1 | ||
ret void | ||
} |