Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] stop buffer_store being moved illegally
Summary: The machine instruction scheduler was illegally moving a buffer store past a buffer load with the same descriptor and offset. Fixed by marking buffer ops as mayAlias and isAliased. This may be overly conservative, and we may need to revisit. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D43332 Change-Id: Iff3173d9e0653e830474546276ab9d30318b8ef7 llvm-svn: 325567
- Loading branch information
Tim Renouf
committed
Feb 20, 2018
1 parent
b8bf5a6
commit 8234b48
Showing
2 changed files
with
55 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s | ||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s | ||
|
||
; The buffer_loads and buffer_stores all access the same location. Check they do | ||
; not get reordered by the scheduler. | ||
|
||
; GCN-LABEL: {{^}}_amdgpu_cs_main: | ||
; GCN: buffer_load_dword | ||
; GCN: buffer_store_dword | ||
; GCN: buffer_load_dword | ||
; GCN: buffer_store_dword | ||
; GCN: buffer_load_dword | ||
; GCN: buffer_store_dword | ||
; GCN: buffer_load_dword | ||
; GCN: buffer_store_dword | ||
|
||
; Function Attrs: nounwind | ||
define amdgpu_cs void @_amdgpu_cs_main(<3 x i32> inreg %arg3, <3 x i32> %arg5) { | ||
.entry: | ||
%tmp9 = add <3 x i32> %arg3, %arg5 | ||
%tmp10 = extractelement <3 x i32> %tmp9, i32 0 | ||
%tmp11 = shl i32 %tmp10, 2 | ||
%tmp12 = inttoptr i64 undef to <4 x i32> addrspace(4)* | ||
%tmp13 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
%tmp14 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp13, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp17 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
call void @llvm.amdgcn.buffer.store.f32(float %tmp14, <4 x i32> %tmp17, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
%tmp21 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp22 = fadd reassoc nnan arcp contract float %tmp21, 1.000000e+00 | ||
call void @llvm.amdgcn.buffer.store.f32(float %tmp22, <4 x i32> %tmp20, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
%tmp26 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp25, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp27 = fadd reassoc nnan arcp contract float %tmp26, 1.000000e+00 | ||
call void @llvm.amdgcn.buffer.store.f32(float %tmp27, <4 x i32> %tmp25, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp30 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
%tmp31 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp30, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp32 = fadd reassoc nnan arcp contract float %tmp31, 1.000000e+00 | ||
call void @llvm.amdgcn.buffer.store.f32(float %tmp32, <4 x i32> %tmp30, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp35 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16 | ||
%tmp36 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp35, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
%tmp37 = fadd reassoc nnan arcp contract float %tmp36, 1.000000e+00 | ||
call void @llvm.amdgcn.buffer.store.f32(float %tmp37, <4 x i32> %tmp35, i32 0, i32 %tmp11, i1 false, i1 false) #0 | ||
ret void | ||
} | ||
|
||
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2 | ||
|
||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3 | ||
|
||
attributes #2 = { nounwind readonly } | ||
attributes #3 = { nounwind writeonly } | ||
|