Skip to content

Commit

Permalink
[AMDGPU] Add llvm.amdgpu.update.dpp intrinsic
Browse files Browse the repository at this point in the history
Summary:
Now that we've made all the necessary backend changes, we can add a new
intrinsic which exposes the new capabilities to IR producers. Since
llvm.amdgpu.update.dpp is a strict superset of llvm.amdgpu.mov.dpp, we
should deprecate the former. We also add tests for all the functionality
that was added in previous changes, now that we can access it via an IR
construct.

Reviewers: tstellar, arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye

Differential Revision: https://reviews.llvm.org/D34718

llvm-svn: 310399
  • Loading branch information
cwabbott0 committed Aug 8, 2017
1 parent 4d85259 commit 249fc7b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Expand Up @@ -788,6 +788,15 @@ def int_amdgcn_mov_dpp :
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i1_ty], [IntrNoMem, IntrConvergent]>;

// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
// Should be equivalent to:
// v_mov_b32 <dest> <old>
// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
def int_amdgcn_update_dpp :
Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;

def int_amdgcn_s_dcache_wb :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
Intrinsic<[], [], []>;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Expand Up @@ -663,6 +663,14 @@ def : Pat <
(as_i1imm $bound_ctrl))
>;

def : Pat <
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
imm:$bank_mask, imm:$bound_ctrl)),
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
>;

def : Pat<
(i32 (anyext i16:$src)),
(COPY $src)
Expand Down
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -0,0 +1,17 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s

; VI-LABEL: {{^}}dpp_test:
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
; VI: v_mov_b32_e32 v1, s{{[0-9]+}}
; VI: s_nop 1
; VI: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 1) #0
store i32 %tmp0, i32 addrspace(1)* %out
ret void
}

declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0

attributes #0 = { nounwind readnone convergent }

0 comments on commit 249fc7b

Please sign in to comment.