45 changes: 30 additions & 15 deletions llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s

# GFX10-LABEL: name: diffoporder_add
# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
# GCN-LABEL: name: diffoporder_add
# GCN: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
# GCN: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0

name: diffoporder_add
body: |
Expand Down Expand Up @@ -43,19 +44,29 @@ body: |
...
---

# GFX10-LABEL: name: LowestInMiddle
# GCN-LABEL: name: LowestInMiddle
# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400
# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
# GFX11: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200

# GCN: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GCN: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0
# GFX11: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
#
# GFX11: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
# GFX11: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX11: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX11: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
# GFX11: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
#
# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
# GFX11: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,

name: LowestInMiddle
body: |
Expand Down Expand Up @@ -101,18 +112,22 @@ body: |
...
---

# GFX10-LABEL: name: NegativeDistance
# GCN-LABEL: name: NegativeDistance
# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
# GFX11: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
# GCN: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GCN: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
# GFX11: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
# GFX11: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0
# GFX11: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0

name: NegativeDistance
body: |
Expand Down Expand Up @@ -194,9 +209,9 @@ body: |
...
---

# GFX10-LABEL: name: diffoporder_add_store
# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0
# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
# GCN-LABEL: name: diffoporder_add_store
# GCN: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0
# GCN: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0

name: diffoporder_add_store
body: |
Expand Down
46 changes: 35 additions & 11 deletions llvm/test/CodeGen/AMDGPU/scratch-simple.ll

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions llvm/test/CodeGen/AMDGPU/shift-select.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX6 %s
; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8-10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8-10 %s
; RUN: llc -march=amdgcn -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8PLUS %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8PLUS %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8PLUS %s

; GCN-LABEL: name: s_shl_i32
; GCN: S_LSHL_B32
Expand All @@ -12,7 +13,7 @@ define amdgpu_kernel void @s_shl_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs)

; GCN-LABEL: name: v_shl_i32
; GFX6: V_LSHL_B32_e32
; GFX8-10: V_LSHLREV_B32_e32
; GFX8PLUS: V_LSHLREV_B32_e32
define amdgpu_kernel void @v_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
Expand All @@ -33,7 +34,7 @@ define amdgpu_kernel void @s_lshr_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs

; GCN-LABEL: name: v_lshr_i32
; GFX6: V_LSHR_B32_e32
; GFX8-10: V_LSHRREV_B32_e64
; GFX8PLUS: V_LSHRREV_B32_e64
define amdgpu_kernel void @v_lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
Expand All @@ -54,7 +55,7 @@ define amdgpu_kernel void @s_ashr_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs

; GCN-LABEL: name: v_ashr_i32
; GFX6: V_ASHR_I32_e32
; GFX8-10: V_ASHRREV_I32_e64
; GFX8PLUS: V_ASHRREV_I32_e64
define amdgpu_kernel void @v_ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
Expand Down
95 changes: 48 additions & 47 deletions llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN %s

# GCN-LABEL: name: hazard_smem_war
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war
body: |
Expand Down Expand Up @@ -64,10 +65,10 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_only_smem
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_LOAD_DWORD_IMM
# GCN-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_LOAD_DWORD_IMM
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_only_smem
body: |
Expand Down Expand Up @@ -95,10 +96,10 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_only_vmcnt_0
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT 3952{{$}}
# GCN-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT 3952{{$}}
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_only_vmcnt_0
body: |
Expand All @@ -111,10 +112,10 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_only_expcnt_0
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT 53007{{$}}
# GCN-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT 53007{{$}}
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_only_expcnt_0
body: |
Expand Down Expand Up @@ -157,10 +158,10 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_1
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT_LGKMCNT
# GCN-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT_LGKMCNT
# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_only_waitcnt_lgkmcnt_1
body: |
Expand All @@ -173,9 +174,9 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_branch
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
---
name: hazard_smem_war_branch
body: |
Expand All @@ -192,14 +193,14 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_cbranch
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: S_ENDPGM 0
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GFX10-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: S_ENDPGM 0
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
---
name: hazard_smem_war_cbranch
body: |
Expand All @@ -222,16 +223,16 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_cbranch_carry
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN-NEXT: S_ENDPGM 0
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GFX10-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN-NEXT: S_ENDPGM 0
# GFX10-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
---
name: hazard_smem_war_cbranch_carry
body: |
Expand Down Expand Up @@ -259,9 +260,9 @@ body: |
...

# GCN-LABEL: name: hazard_smem_war_backedge
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
---
name: hazard_smem_war_backedge
body: |
Expand All @@ -278,7 +279,7 @@ body: |

# GCN-LABEL: name: hazard_smem_war_impdef
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_impdef
Expand All @@ -292,7 +293,7 @@ body: |

# GCN-LABEL: name: hazard_smem_war_readlane
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_READLANE_B32
---
name: hazard_smem_war_readlane
Expand All @@ -306,7 +307,7 @@ body: |

# GCN-LABEL: name: hazard_smem_war_readfirstlane
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GFX10: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_READFIRSTLANE_B32
---
name: hazard_smem_war_readfirstlane
Expand Down
15 changes: 10 additions & 5 deletions llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX11 %s

; GCN-LABEL: {{^}}smrd_imm_dlc:
; GCN: s_buffer_load_dword s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0xfa]
; GFX10: s_buffer_load_dword s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0xfa]
; GFX11: s_buffer_load_b32 s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x20,0x20,0xf4,0x00,0x00,0x00,0xf8]
define amdgpu_ps float @smrd_imm_dlc(<4 x i32> inreg %desc) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 4)
ret float %r
}

; GCN-LABEL: {{^}}smrd_sgpr_dlc:
; GCN: s_buffer_load_dword s0, s[0:3], s4 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0x08]
; GFX10: s_buffer_load_dword s0, s[0:3], s4 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0x08]
; GFX11: s_buffer_load_b32 s0, s[0:3], s4 dlc ; encoding: [0x00,0x20,0x20,0xf4,0x00,0x00,0x00,0x08]
define amdgpu_ps float @smrd_sgpr_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 4)
ret float %r
}

; GCN-LABEL: {{^}}smrd_imm_glc_dlc:
; GCN: s_buffer_load_dword s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0xfa]
; GFX10: s_buffer_load_dword s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0xfa]
; GFX11: s_buffer_load_b32 s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x60,0x20,0xf4,0x00,0x00,0x00,0xf8]
define amdgpu_ps float @smrd_imm_glc_dlc(<4 x i32> inreg %desc) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 5)
ret float %r
}

; GCN-LABEL: {{^}}smrd_sgpr_glc_dlc:
; GCN: s_buffer_load_dword s0, s[0:3], s4 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0x08]
; GFX10: s_buffer_load_dword s0, s[0:3], s4 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0x08]
; GFX11: s_buffer_load_b32 s0, s[0:3], s4 glc dlc ; encoding: [0x00,0x60,0x20,0xf4,0x00,0x00,0x00,0x08]
define amdgpu_ps float @smrd_sgpr_glc_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 5)
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s

# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GFX10-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec_lo
body: |
Expand All @@ -19,9 +20,9 @@ body: |
...

# GCN-LABEL: name: hazard_vcmpx_smov_exec
# GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec
# GCN-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
# GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec
# GFX10-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec
body: |
Expand Down Expand Up @@ -145,9 +146,10 @@ body: |
...

# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_effe
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: S_WAITCNT_DEPCTR 61438
# GFX10-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec_lo_depctr_effe
body: |
Expand Down
149 changes: 76 additions & 73 deletions llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN %s

# GCN-LABEL: name: vmem_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr
body: |
Expand All @@ -15,9 +16,9 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec
# GCN: BUFFER_STORE_DWORD_OFFEN_exact
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_STORE_DWORD_OFFEN_exact
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_exec
body: |
Expand All @@ -30,13 +31,13 @@ body: |
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_write_sgpr_chain
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GCN-NEXT: S_MOV_B32
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_chain
body: |
Expand All @@ -53,9 +54,9 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_smem_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_LOAD_DWORD_IMM
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_LOAD_DWORD_IMM
---
name: vmem_smem_write_sgpr
body: |
Expand All @@ -67,10 +68,10 @@ body: |
$sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
...
# GCN-LABEL: name: vmem_snop_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_NOP
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_NOP
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_snop_write_sgpr
body: |
Expand Down Expand Up @@ -113,10 +114,10 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_swait_any_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_swait_any_write_sgpr
body: |
Expand All @@ -129,9 +130,9 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec_impread
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B64
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B64
---
name: vmem_write_exec_impread
body: |
Expand All @@ -143,9 +144,9 @@ body: |
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: vmem_write_exec_expread
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B64
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B64
---
name: vmem_write_exec_expread
body: |
Expand All @@ -156,9 +157,9 @@ body: |
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: ds_write_m0
# GCN: DS_READ_B32
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: DS_READ_B32
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: ds_write_m0
body: |
Expand All @@ -170,9 +171,10 @@ body: |
$m0 = S_MOV_B32 7
...
# GCN-LABEL: name: vmem_write_sgpr_fall_through
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN: bb.1:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_fall_through
body: |
Expand All @@ -187,10 +189,11 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: bb.1:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch
body: |
Expand All @@ -206,11 +209,11 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_around
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: bb.2:
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: bb.2:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch_around
body: |
Expand All @@ -230,15 +233,15 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_cbranch_around
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_CBRANCH
# GCN-NEXT: S_BRANCH
# GCN: bb.1:
# GCN: S_WAITCNT
# GCN: V_ADD_CO_U32
# GCN: bb.2:
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_CBRANCH
# GCN-NEXT: S_BRANCH
# GCN: bb.1:
# GCN: S_WAITCNT
# GCN: V_ADD_CO_U32
# GCN: bb.2:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_cbranch_around
body: |
Expand All @@ -261,9 +264,9 @@ body: |
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_backedge
# GCN: $vgpr0 = IMPLICIT_DEF
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: $vgpr0 = IMPLICIT_DEF
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch_backedge
body: |
Expand All @@ -279,9 +282,9 @@ body: |
S_BRANCH %bb.0
...
# GCN-LABEL: name: ds_write_exec
# GCN: DS_WRITE_B32_gfx9
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: DS_WRITE_B32_gfx9
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: ds_write_exec
body: |
Expand All @@ -292,9 +295,9 @@ body: |
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_scratch_exec
# GCN: SCRATCH_LOAD_DWORD
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: SCRATCH_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_scratch_exec
body: |
Expand All @@ -304,9 +307,9 @@ body: |
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_flat_exec
# GCN: FLAT_LOAD_DWORD
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: FLAT_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_flat_exec
body: |
Expand All @@ -317,9 +320,9 @@ body: |
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_exec
# GCN: GLOBAL_LOAD_DWORD
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: GLOBAL_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_global_exec
body: |
Expand All @@ -330,9 +333,9 @@ body: |
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_atomic_exec
# GCN: GLOBAL_ATOMIC_ADD_RTN
# GCN-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
# GCN: GLOBAL_ATOMIC_ADD_RTN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT: S_MOV_B32
---
name: vmem_global_atomic_exec
body: |
Expand Down
63 changes: 32 additions & 31 deletions llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
; RUN: llc -march=amdgcn -mcpu=gfx802 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_10,GFX8_9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9PLUS,GFX8_9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX9PLUS %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX9PLUS %s

; GCN-LABEL: barrier_vmcnt_global:
; GFX8: flat_load_dword
; GFX9_10: global_load_dword
; GFX9PLUS: global_load_{{dword|b32}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9_10: s_waitcnt vmcnt(0){{$}}
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vmcnt_global(i32 addrspace(1)* %arg) {
bb:
Expand All @@ -27,10 +28,10 @@ bb:

; GCN-LABEL: barrier_vscnt_global:
; GFX8: flat_store_dword
; GFX9_10: global_store_dword
; GFX9PLUS: global_store_{{dword|b32}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9: s_waitcnt vmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vscnt_global(i32 addrspace(1)* %arg) {
bb:
Expand All @@ -53,10 +54,10 @@ bb:

; GCN-LABEL: barrier_vmcnt_vscnt_global:
; GFX8: flat_load_dword
; GFX9_10: global_load_dword
; GFX9PLUS: global_load_{{dword|b32}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9_10: s_waitcnt vmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vmcnt_vscnt_global(i32 addrspace(1)* %arg) {
bb:
Expand All @@ -80,7 +81,7 @@ bb:
}

; GCN-LABEL: barrier_vmcnt_flat:
; GCN: flat_load_dword
; GCN: flat_load_{{dword|b32}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vmcnt_flat(i32* %arg) {
Expand All @@ -101,10 +102,10 @@ bb:
}

; GCN-LABEL: barrier_vscnt_flat:
; GCN: flat_store_dword
; GCN: flat_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vscnt_flat(i32* %arg) {
bb:
Expand All @@ -126,9 +127,9 @@ bb:
}

; GCN-LABEL: barrier_vmcnt_vscnt_flat:
; GCN: flat_load_dword
; GCN: flat_load_{{dword|b32}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vmcnt_vscnt_flat(i32* %arg) {
bb:
Expand All @@ -152,11 +153,11 @@ bb:
}

; GCN-LABEL: barrier_vmcnt_vscnt_flat_workgroup:
; GCN: flat_load_dword
; GCN: flat_load_{{dword|b32}}
; GFX8_9: s_waitcnt lgkmcnt(0){{$}}
; GFX8_9: s_waitcnt vmcnt(0){{$}}
; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(i32* %arg) {
bb:
Expand All @@ -181,10 +182,10 @@ bb:

; GCN-LABEL: load_vmcnt_global:
; GFX8: flat_load_dword
; GFX9_10: global_load_dword
; GFX9PLUS: global_load_{{dword|b32}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9_10: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: {{global|flat}}_store_dword
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: {{global|flat}}_store_{{dword|b32}}
define amdgpu_kernel void @load_vmcnt_global(i32 addrspace(1)* %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -200,10 +201,10 @@ bb:
}

; GCN-LABEL: load_vmcnt_flat:
; GCN: flat_load_dword
; GCN: flat_load_{{dword|b32}}
; GCN-NOT: vscnt
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: {{global|flat}}_store_dword
; GCN-NEXT: {{global|flat}}_store_{{dword|b32}}
define amdgpu_kernel void @load_vmcnt_flat(i32* %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -219,9 +220,9 @@ bb:
}

; GCN-LABEL: store_vscnt_private:
; GCN: buffer_store_dword
; GCN: {{buffer|scratch}}_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0)
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_private(i32 addrspace(5)* %p) {
store i32 0, i32 addrspace(5)* %p
Expand All @@ -230,20 +231,20 @@ define void @store_vscnt_private(i32 addrspace(5)* %p) {

; GCN-LABEL: store_vscnt_global:
; GFX8: flat_store_dword
; GFX9_10: global_store_dword
; GFX9PLUS: global_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0)
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_global(i32 addrspace(1)* %p) {
store i32 0, i32 addrspace(1)* %p
ret void
}

; GCN-LABEL: store_vscnt_flat:
; GCN: flat_store_dword
; GCN: flat_store_{{dword|b32}}
; GFX8_9: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @store_vscnt_flat(i32* %p) {
store i32 0, i32* %p
Expand All @@ -252,7 +253,7 @@ define void @store_vscnt_flat(i32* %p) {

; GCN-LABEL: function_prologue:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0){{$}}
; GFX10: s_waitcnt_vscnt null, 0x0
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
define void @function_prologue() {
ret void
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s

# GFX10-LABEL: waitcnt-vscnt
# GFX10: GLOBAL_ATOMIC_ADD_RTN
# GCN-LABEL: waitcnt-vscnt
# GCN: GLOBAL_ATOMIC_ADD_RTN
# GFX10-NEXT: S_WAITCNT 49279
# GFX11-NEXT: S_WAITCNT 64519
---
name: waitcnt-vscnt
machineFunctionInfo:
Expand Down