Skip to content

Commit

Permalink
Add IntrWrite[Arg]Mem intrinsic property
Browse files Browse the repository at this point in the history
Summary:
This property is used to mark an intrinsic that only writes to memory, but
neither reads from memory nor has other side effects.

An example where this is useful is the llvm.amdgcn.buffer.store.format.*
intrinsic, which corresponds to a store instruction that goes through a special
buffer descriptor rather than through a plain pointer.

With this property, the intrinsic should still be handled as having side
effects at the LLVM IR level, but machine scheduling can make smarter
decisions.

Reviewers: tstellarAMD, arsenm, joker.eph, reames

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18291

llvm-svn: 266826
  • Loading branch information
nhaehnle committed Apr 19, 2016
1 parent e2dda4f commit b48275f
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 41 deletions.
11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -38,6 +38,17 @@ def IntrReadArgMem : IntrinsicProperty;
// deleted if dead.
def IntrReadMem : IntrinsicProperty;

// IntrWriteMem - This intrinsic writes to unspecified memory, but does not
// read from memory, and has no other side effects. This means dead stores
// before calls to this intrinsics may be removed.
def IntrWriteMem : IntrinsicProperty;

// IntrWriteArgMem - This intrinsic writes only to memory that one of its
// arguments points to, but may access an unspecified amount. The intrinsic
// does not read from memory and has no other side effects. This means that
// dead stores before calls to this intrinsics may be removed.
def IntrWriteArgMem : IntrinsicProperty;

// IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
// one of its arguments points to, but may access an unspecified amount. The
// reads and writes may be volatile, but except for this it has no other side
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Expand Up @@ -243,7 +243,7 @@ class AMDGPUBufferStore : Intrinsic <
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[]>;
[IntrWriteMem]>;
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore;

Expand Down
35 changes: 12 additions & 23 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -949,23 +949,18 @@ defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
>;
// Without mayLoad and hasSideEffects, TableGen complains about the pattern
// matching llvm.amdgcn.buffer.store.format. Eventually, we'll need a way
// to express the effects of the intrinsic more precisely.
let mayLoad = 1, hasSideEffects = 1 in {
defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
mubuf<0x04>, "buffer_store_format_x", VGPR_32
>;
defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
mubuf<0x05>, "buffer_store_format_xy", VReg_64
>;
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
mubuf<0x06>, "buffer_store_format_xyz", VReg_96
>;
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
>;
}
defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
mubuf<0x04>, "buffer_store_format_x", VGPR_32
>;
defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
mubuf<0x05>, "buffer_store_format_xy", VReg_64
>;
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
mubuf<0x06>, "buffer_store_format_xyz", VReg_96
>;
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
Expand Down Expand Up @@ -996,11 +991,6 @@ defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
>;

// Without mayLoad and hasSideEffects, TableGen complains about the pattern
// matching llvm.amdgcn.buffer.store. Eventually, we'll want a WriteOnly
// property to express the effects of this intrinsic more precisely, see
// http://reviews.llvm.org/D18291
let mayLoad = 1, hasSideEffects = 1 in {
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
>;
Expand All @@ -1012,7 +1002,6 @@ defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
>;
}

defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
Expand Up @@ -16,11 +16,11 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {

; Offset is applied
; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}

; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]

; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
Expand Up @@ -11,8 +11,8 @@ declare void @llvm.amdgcn.s.barrier() #1

; FUNC-LABEL: @reorder_local_load_global_store_local_load
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
; CI-NEXT: buffer_store_dword
; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
; CI: buffer_store_dword
define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4

Expand Down Expand Up @@ -71,9 +71,9 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace
}

; FUNC-LABEL: @reorder_constant_load_global_store_constant_load
; CI: buffer_store_dword
; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
; CI-DAG: buffer_store_dword
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2
; CI: buffer_store_dword
Expand Down Expand Up @@ -184,11 +184,11 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
}

; FUNC-LABEL: @reorder_global_offsets
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
; CI: buffer_store_dword
; CI: s_endpgm
Expand Down
6 changes: 3 additions & 3 deletions llvm/utils/TableGen/CodeGenDAGPatterns.cpp
Expand Up @@ -2816,14 +2816,14 @@ class InstAnalyzer {

if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
// If this is an intrinsic, analyze it.
if (IntInfo->ModRef >= CodeGenIntrinsic::ReadArgMem)
if (IntInfo->ModRef & CodeGenIntrinsic::MR_Ref)
mayLoad = true;// These may load memory.

if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteArgMem)
if (IntInfo->ModRef & CodeGenIntrinsic::MR_Mod)
mayStore = true;// Intrinsics that can write to memory are 'mayStore'.

if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
// WriteMem intrinsics can have other strange effects.
// ReadWriteMem intrinsics can have other strange effects.
hasSideEffects = true;
}
}
Expand Down
33 changes: 29 additions & 4 deletions llvm/utils/TableGen/CodeGenIntrinsics.h
Expand Up @@ -59,11 +59,36 @@ namespace llvm {

IntrinsicSignature IS;

// Memory mod/ref behavior of this intrinsic.
enum ModRefKind {
NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
/// Bit flags describing the type (ref/mod) and location of memory
/// accesses that may be performed by the intrinsics. Analogous to
/// \c FunctionModRefBehaviour.
enum ModRefBits {
/// The intrinsic may access memory anywhere, i.e. it is not restricted
/// to access through pointer arguments.
MR_Anywhere = 1,

/// The intrinsic may read memory.
MR_Ref = 2,

/// The intrinsic may write memory.
MR_Mod = 4,

/// The intrinsic may both read and write memory.
MR_ModRef = MR_Ref | MR_Mod,
};
ModRefKind ModRef;

/// Memory mod/ref behavior of this intrinsic, corresponding to
/// intrinsic properties (IntrReadMem, IntrReadArgMem, etc.).
enum ModRefBehavior {
NoMem = 0,
ReadArgMem = MR_Ref,
ReadMem = MR_Ref | MR_Anywhere,
WriteArgMem = MR_Mod,
WriteMem = MR_Mod | MR_Anywhere,
ReadWriteArgMem = MR_ModRef,
ReadWriteMem = MR_ModRef | MR_Anywhere,
};
ModRefBehavior ModRef;

/// This is set to true if the intrinsic is overloaded by its argument
/// types.
Expand Down
4 changes: 4 additions & 0 deletions llvm/utils/TableGen/CodeGenTarget.cpp
Expand Up @@ -577,6 +577,10 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
ModRef = ReadArgMem;
else if (Property->getName() == "IntrReadMem")
ModRef = ReadMem;
else if (Property->getName() == "IntrWriteMem")
ModRef = WriteMem;
else if (Property->getName() == "IntrWriteArgMem")
ModRef = WriteArgMem;
else if (Property->getName() == "IntrReadWriteArgMem")
ModRef = ReadWriteArgMem;
else if (Property->getName() == "Commutative")
Expand Down
6 changes: 4 additions & 2 deletions llvm/utils/TableGen/IntrinsicEmitter.cpp
Expand Up @@ -462,8 +462,8 @@ struct AttributeComparator {
return R->isConvergent;

// Try to order by readonly/readnone attribute.
CodeGenIntrinsic::ModRefKind LK = L->ModRef;
CodeGenIntrinsic::ModRefKind RK = R->ModRef;
CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
if (LK != RK) return (LK > RK);

// Order by argument attributes.
Expand Down Expand Up @@ -616,11 +616,13 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
OS << ",";
OS << "Attribute::ReadOnly";
break;
case CodeGenIntrinsic::WriteArgMem:
case CodeGenIntrinsic::ReadWriteArgMem:
if (addComma)
OS << ",";
OS << "Attribute::ArgMemOnly";
break;
case CodeGenIntrinsic::WriteMem:
case CodeGenIntrinsic::ReadWriteMem:
break;
}
Expand Down

0 comments on commit b48275f

Please sign in to comment.