Skip to content

Commit

Permalink
[AMDGPU] More GFX11 coverage for tests with generated checks
Browse files Browse the repository at this point in the history
  • Loading branch information
jayfoad committed Jul 8, 2022
1 parent 03af9ba commit de3b5d7
Show file tree
Hide file tree
Showing 7 changed files with 3,607 additions and 2,120 deletions.
5,123 changes: 3,202 additions & 1,921 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

Large diffs are not rendered by default.

193 changes: 131 additions & 62 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
@@ -1,97 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11

define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_atomic_csub:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
ret i32 %ret
}

define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub_offset:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_mov_b64 s[4:5], 0x1000
; GCN-NEXT: v_mov_b32_e32 v3, s4
; GCN-NEXT: v_mov_b32_e32 v4, s5
; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub_offset:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
; GFX10-NEXT: v_mov_b32_e32 v3, s4
; GFX10-NEXT: v_mov_b32_e32 v4, s5
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_atomic_csub_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
ret i32 %ret
}

define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub_nortn:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub_nortn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_atomic_csub_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
ret void
}

define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub_offset_nortn:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_mov_b64 s[4:5], 0x1000
; GCN-NEXT: v_mov_b32_e32 v3, s4
; GCN-NEXT: v_mov_b32_e32 v4, s5
; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: global_atomic_csub_offset_nortn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000
; GFX10-NEXT: v_mov_b32_e32 v3, s4
; GFX10-NEXT: v_mov_b32_e32 v4, s5
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: global_atomic_csub_offset_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
ret void
}

define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub_sgpr_base_offset:
; GCN: ; %bb.0:
; GCN-NEXT: s_clause 0x1
; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_mov_b32_e32 v1, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_endpgm
; GFX10-LABEL: global_atomic_csub_sgpr_base_offset:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dword v[0:1], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: global_atomic_csub_sgpr_base_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s2
; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
store i32 %ret, i32 addrspace(1)* undef
ret void
}

define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
; GCN-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
; GCN: ; %bb.0:
; GCN-NEXT: s_clause 0x1
; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_mov_b32_e32 v1, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
; GCN-NEXT: s_endpgm
; GFX10-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: global_atomic_csub v0, v1, v0, s[0:1] glc
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s2
; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
ret void
Expand Down
49 changes: 36 additions & 13 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
Expand Up @@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s

define double @v_trig_preop_f64(double %a, i32 %b) {
; GCN-LABEL: v_trig_preop_f64:
Expand All @@ -11,12 +12,12 @@ define double @v_trig_preop_f64(double %a, i32 %b) {
; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_trig_preop_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_trig_preop_f64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
ret double %result
}
Expand All @@ -28,12 +29,12 @@ define double @v_trig_preop_f64_imm(double %a, i32 %b) {
; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_trig_preop_f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
ret double %result
}
Expand Down Expand Up @@ -82,6 +83,18 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_trig_preop_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[2:3], s0
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
store volatile double %result, double* undef
ret void
Expand All @@ -105,6 +118,16 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_trig_preop_f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
store volatile double %result, double* undef
ret void
Expand Down

0 comments on commit de3b5d7

Please sign in to comment.