Skip to content

Commit

Permalink
R600/SI: Use same complex patterns for DS atomics
Browse files Browse the repository at this point in the history
This fixes hitting the same negative base offset problem
that was already fixed for regular loads and stores.

llvm-svn: 217256
  • Loading branch information
arsenm committed Sep 5, 2014
1 parent 1fcea42 commit 8ae5961
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 68 deletions.
114 changes: 47 additions & 67 deletions llvm/lib/Target/R600/SIInstructions.td
Expand Up @@ -2572,17 +2572,10 @@ def : Pat <
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
>;

multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
>;

def : Pat <
(frag i32:$ptr, vt:$val),
(inst 0, $ptr, $val, 0)
>;
}
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
>;

// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
//
Expand All @@ -2594,69 +2587,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;

def : Pat <
(frag i32:$ptr, (vt 1)),
(inst 0, $ptr, (LoadImm (vt -1)), 0)
>;
}
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;

multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;

def : Pat <
(frag i32:$ptr, vt:$cmp, vt:$swap),
(inst 0, $ptr, $cmp, $swap, 0)
>;
}
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;


// 32-bit atomics.
defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
S_MOV_B32, atomic_load_add_local>;
defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
S_MOV_B32, atomic_load_sub_local>;

defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;

defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
S_MOV_B32, atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
S_MOV_B32, atomic_load_sub_local>;

def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;

def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;

// 64-bit atomics.
defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
S_MOV_B64, atomic_load_add_local>;
defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
S_MOV_B64, atomic_load_sub_local>;

defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;

defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
S_MOV_B64, atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
S_MOV_B64, atomic_load_sub_local>;

def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;

def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;


//===----------------------------------------------------------------------===//
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s

; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
Expand Down Expand Up @@ -35,3 +36,17 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}

; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
29 changes: 28 additions & 1 deletion llvm/test/CodeGen/R600/local-atomics.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s

; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
Expand Down Expand Up @@ -47,6 +48,19 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}

; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}

; FUNC-LABEL: @lds_atomic_inc_ret_i32:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
Expand All @@ -70,6 +84,19 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}

; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}

; FUNC-LABEL: @lds_atomic_sub_ret_i32:
; SI: DS_SUB_RTN_U32
; SI: S_ENDPGM
Expand Down

0 comments on commit 8ae5961

Please sign in to comment.