Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Correctly merge alias.scope and noalias metadata for memops
When adding alias.scope and noalias metadata to a memcpy function, the alias.scope and noalias metadata from the operands are merged. The rule for merging alias.scope is to take the intersection of the domains and the union of the scopes within those domains. The rule for merging noalias is to take the intersection. The bug is that AMDGPULowerModuleLDS was using concatenation for both alias.scope and noalias. For example, when f1 and f2 are added to the LDS structure and there is a memcpy(f2, f1, sizeof(f1)). Then, concatenation creates noalias metadata for the memcpy that includes both {f1, f2}. That means that the memcpy is assumed not to alias a prior load of f2, which enables the optimizer to remove a load of f2 that occurs after mempcy. The function MDNode::getmostGenericAliasScope defines the semantics for alias.scope. There is a function, combineMetadata in Local.cpp, that uses intersect for noalias. Differential Revision: https://reviews.llvm.org/D110049
- Loading branch information
Showing
3 changed files
with
61 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s | ||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s | ||
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s | ||
|
||
%vec_type = type { %vec_base } | ||
%vec_base = type { %union.anon } | ||
%union.anon = type { %"vec_base<char, 3>::n_vec_" } | ||
%"vec_base<char, 3>::n_vec_" = type { [3 x i8] } | ||
|
||
$_f1 = comdat any | ||
$_f2 = comdat any | ||
@_f1 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1 | ||
@_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1 | ||
|
||
; GCN-LABEL: @test | ||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1 | ||
; GCN-NEXT: global_store_byte v{{[0-9]+}}, [[REG]] | ||
|
||
; CHECK-LABEL: @test | ||
; CHECK: store i8 3, i8 addrspace(3)* %0, align 4, !alias.scope !0, !noalias !3 | ||
; CHECK: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) %2, i8 addrspace(3)* noundef align 1 dereferenceable(3) %1, i64 3, i1 false), !alias.scope !6, !noalias !7 | ||
; CHECK: %4 = load i8, i8 addrspace(3)* %3, align 4, !alias.scope !8, !noalias !9 | ||
; CHECK: tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) %7, i8 addrspace(3)* noundef align 1 dereferenceable(3) %6, i64 3, i1 false), !alias.scope !6, !noalias !7 | ||
; CHECK: %9 = load i8, i8 addrspace(3)* %8, align 4, !alias.scope !8, !noalias !9 | ||
|
||
define protected amdgpu_kernel void @test(i8 addrspace(1)* nocapture %ptr.coerce) local_unnamed_addr #0 { | ||
entry: | ||
store i8 3, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 | ||
tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false) | ||
%0 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 | ||
%cmp.i.i = icmp eq i8 %0, 3 | ||
store i8 2, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 | ||
tail call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 addrspace(3)* noundef align 1 dereferenceable(3) getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 3, i1 false) | ||
%1 = load i8, i8 addrspace(3)* getelementptr inbounds (%vec_type, %vec_type addrspace(3)* @_f2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0), align 1 | ||
%cmp.i.i19 = icmp eq i8 %1, 2 | ||
%2 = and i1 %cmp.i.i19, %cmp.i.i | ||
%frombool8 = zext i1 %2 to i8 | ||
store i8 %frombool8, i8 addrspace(1)* %ptr.coerce, align 1 | ||
ret void | ||
} | ||
|
||
declare void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i64, i1 immarg) #1 | ||
|
||
; CHECK:!0 = !{!1} | ||
; CHECK:!1 = distinct !{!1, !2} | ||
; CHECK:!2 = distinct !{!2} | ||
; CHECK:!3 = !{!4, !5} | ||
; CHECK:!4 = distinct !{!4, !2} | ||
; CHECK:!5 = distinct !{!5, !2} | ||
; CHECK:!6 = !{!5, !1} | ||
; CHECK:!7 = !{!4} | ||
; CHECK:!8 = !{!5} | ||
; CHECK:!9 = !{!1, !4} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters