Skip to content

Commit

Permalink
[AAPointerInfo] OffsetInfo: Unassigned is distinct from Unknown
Browse files Browse the repository at this point in the history
A User like the PHINode may be visited multiple times for the same pointer along
different def-use edges. The uninitialized state of OffsetInfo at the first
visit needs to be distinct from the Unknown value that may be assigned after
processing the PHINode. Without that, a PHINode with all inputs Unknown is never
followed to its uses. This results in incorrect optimization because some
interfering accessess are missed.

Differential Revision: https://reviews.llvm.org/D134704
  • Loading branch information
ssahasra committed Sep 28, 2022
1 parent 0fb2676 commit 3f078b3
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 7 deletions.
5 changes: 3 additions & 2 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -5067,8 +5067,9 @@ struct AAPointerInfo : public AbstractAttribute {
OAS.getOffset() < getOffset() + getSize();
}

/// Constant used to represent unknown offset or sizes.
static constexpr int64_t Unknown = 1 << 31;
/// Constants used to represent special offsets or sizes.
static constexpr int64_t Unassigned = -1;
static constexpr int64_t Unknown = -2;
};

/// Call \p CB on all accesses that might interfere with \p OAS and return
Expand Down
23 changes: 18 additions & 5 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1226,7 +1226,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {

/// Helper struct, will support ranges eventually.
struct OffsetInfo {
int64_t Offset = OffsetAndSize::Unknown;
int64_t Offset = OffsetAndSize::Unassigned;

bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
};
Expand All @@ -1243,6 +1243,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {

auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI,
bool &Follow) {
assert(PtrOI.Offset != OffsetAndSize::Unassigned &&
"Cannot pass through if the input Ptr was not visited!");
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
UsrOI = PtrOI;
Follow = true;
Expand Down Expand Up @@ -1283,11 +1285,15 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (PtrOI.Offset == OffsetAndSize::Unknown ||
!GEP->accumulateConstantOffset(DL, GEPOffset)) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant "
<< *GEP << "\n");
UsrOI.Offset = OffsetAndSize::Unknown;
Follow = true;
return true;
}

LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP
<< "\n");
UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue();
Follow = true;
return true;
Expand All @@ -1306,15 +1312,22 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
bool IsFirstPHIUser = !OffsetInfoMap.count(Usr);
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
// Check if the PHI is invariant (so far).
if (UsrOI == PtrOI)
return true;

// Check if the PHI operand has already an unknown offset as we can't
// improve on that anymore.
if (PtrOI.Offset == OffsetAndSize::Unknown) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown "
<< *CurPtr << " in " << *Usr << "\n");
Follow = UsrOI.Offset != OffsetAndSize::Unknown;
UsrOI = PtrOI;
Follow = true;
return true;
}

// Check if the PHI is invariant (so far).
if (UsrOI == PtrOI) {
assert(PtrOI.Offset != OffsetAndSize::Unassigned &&
"Cannot assign if the current Ptr was not visited!");
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)");
return true;
}

Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attributor.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc < %s | FileCheck %s

target triple = "amdgcn-amd-amdhsa"

; The call to intrinsic implicitarg_ptr reaches a load through a phi. The
; offsets of the phi cannot be determined, and hence the attirbutor assumes that
; hostcall is in use.

; CHECK: .value_kind: hidden_hostcall_buffer
; CHECK: .value_kind: hidden_multigrid_sync_arg

define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
entry:
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
br i1 %cond, label %old, label %new

old: ; preds = %entry
%tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 %index1
br label %join

new: ; preds = %entry
%tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %index2
br label %join

join: ; preds = %new, %old
%.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ]
%.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)*

;;; THIS USE is where the offset into implicitarg_ptr is unknown
%.in = load i16, i16 addrspace(4)* %.in.in, align 2

%idx.ext = sext i16 %.in to i64
%add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
%tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x()

declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()

declare i32 @llvm.amdgcn.workgroup.id.x()
65 changes: 65 additions & 0 deletions llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1616,6 +1616,71 @@ entry:
ret i32 %add1
}

define i8 @local_alloca_not_simplifiable_2(i64 %index1, i64 %index2, i1 %cnd) {
; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn
; TUNIT-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
; TUNIT-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 [[CND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
; TUNIT-NEXT: [[GEP0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
; TUNIT-NEXT: store i8 7, i8* [[GEP0]], align 16
; TUNIT-NEXT: br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
; TUNIT: left:
; TUNIT-NEXT: [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX1]]
; TUNIT-NEXT: br label [[JOIN:%.*]]
; TUNIT: right:
; TUNIT-NEXT: [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX2]]
; TUNIT-NEXT: br label [[JOIN]]
; TUNIT: join:
; TUNIT-NEXT: [[GEP_JOIN:%.*]] = phi i8* [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
; TUNIT-NEXT: store i8 9, i8* [[GEP_JOIN]], align 4
; TUNIT-NEXT: [[I:%.*]] = load i8, i8* [[GEP0]], align 16
; TUNIT-NEXT: ret i8 [[I]]
;
; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn
; CGSCC-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
; CGSCC-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 [[CND:%.*]]) #[[ATTR5]] {
; CGSCC-NEXT: entry:
; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
; CGSCC-NEXT: [[GEP0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
; CGSCC-NEXT: store i8 7, i8* [[GEP0]], align 16
; CGSCC-NEXT: br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
; CGSCC: left:
; CGSCC-NEXT: [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX1]]
; CGSCC-NEXT: br label [[JOIN:%.*]]
; CGSCC: right:
; CGSCC-NEXT: [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX2]]
; CGSCC-NEXT: br label [[JOIN]]
; CGSCC: join:
; CGSCC-NEXT: [[GEP_JOIN:%.*]] = phi i8* [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
; CGSCC-NEXT: store i8 9, i8* [[GEP_JOIN]], align 4
; CGSCC-NEXT: [[I:%.*]] = load i8, i8* [[GEP0]], align 16
; CGSCC-NEXT: ret i8 [[I]]
;
entry:
%Bytes = alloca [1024 x i8], align 16
%gep0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
store i8 7, i8* %gep0, align 4
br i1 %cnd, label %left, label %right

left: ; preds = %entry
%gep1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %index1
br label %join

right: ; preds = %entry
%gep2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %index2
br label %join

join: ; preds = %right, %left
%gep.join = phi i8* [ %gep1, %left ], [ %gep2, %right ]
store i8 9, i8* %gep.join, align 4

; This load cannot be replaced by the value 7 from %entry, since the previous
; store interferes due to its unknown offset.
%i = load i8, i8* %gep0, align 4
ret i8 %i
}

; We could simplify these if we separate accessed bins wrt. alignment (here mod 4).
define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) {
; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
Expand Down

0 comments on commit 3f078b3

Please sign in to comment.