92 changes: 44 additions & 48 deletions llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,19 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) #1 {
define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
; CHECK-LABEL: module_1_kernel_normal_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_add_u32 s6, s6, s9
; CHECK-NEXT: s_add_u32 s8, s8, s11
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_addc_u32 s7, s7, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
; CHECK-NEXT: s_add_u32 s0, s0, s9
; CHECK-NEXT: s_addc_u32 s9, s9, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; CHECK-NEXT: s_add_u32 s0, s0, s11
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_add_u32 s8, s4, 8
; CHECK-NEXT: s_addc_u32 s9, s5, 0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
; CHECK-NEXT: s_getpc_b64 s[8:9]
; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0
; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
Expand Down Expand Up @@ -118,20 +117,19 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) #1
define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
; CHECK-LABEL: module_1_kernel_overalign_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_add_u32 s6, s6, s9
; CHECK-NEXT: s_add_u32 s8, s8, s11
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_addc_u32 s7, s7, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
; CHECK-NEXT: s_add_u32 s0, s0, s9
; CHECK-NEXT: s_addc_u32 s9, s9, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; CHECK-NEXT: s_add_u32 s0, s0, s11
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_add_u32 s8, s4, 8
; CHECK-NEXT: s_addc_u32 s9, s5, 0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
; CHECK-NEXT: s_getpc_b64 s[8:9]
; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0
; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
Expand Down Expand Up @@ -177,20 +175,19 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) #1
define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
; CHECK-LABEL: module_1_kernel_normal_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_add_u32 s6, s6, s9
; CHECK-NEXT: s_add_u32 s8, s8, s11
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_addc_u32 s7, s7, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
; CHECK-NEXT: s_add_u32 s0, s0, s9
; CHECK-NEXT: s_addc_u32 s9, s9, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; CHECK-NEXT: s_add_u32 s0, s0, s11
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_add_u32 s8, s4, 8
; CHECK-NEXT: s_addc_u32 s9, s5, 0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
; CHECK-NEXT: s_getpc_b64 s[8:9]
; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0
; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
Expand Down Expand Up @@ -236,20 +233,19 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx)
define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) {
; CHECK-LABEL: module_1_kernel_overalign_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_add_u32 s6, s6, s9
; CHECK-NEXT: s_add_u32 s8, s8, s11
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_addc_u32 s7, s7, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
; CHECK-NEXT: s_add_u32 s0, s0, s9
; CHECK-NEXT: s_addc_u32 s9, s9, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; CHECK-NEXT: s_add_u32 s0, s0, s11
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_add_u32 s8, s4, 8
; CHECK-NEXT: s_addc_u32 s9, s5, 0
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
; CHECK-NEXT: s_getpc_b64 s[8:9]
; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0
; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@ define void @func_use_lds_global() {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_mov_b64 s[4:5], 0
; GFX8-NEXT: ds_write_b32 v0, v0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX8-NEXT: s_trap 2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_use_lds_global:
Expand All @@ -38,9 +37,7 @@ define void @func_use_lds_global_constexpr_cast() {
; GFX8-LABEL: func_use_lds_global_constexpr_cast:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b64 s[4:5], 0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX8-NEXT: s_trap 2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs --amdhsa-code-object-version=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

; GCN-LABEL: {{^}}is_private_vgpr:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs --amdhsa-code-object-version=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

; GCN-LABEL: {{^}}is_local_vgpr:
Expand Down
244 changes: 122 additions & 122 deletions llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Large diffs are not rendered by default.

92 changes: 44 additions & 48 deletions llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@ store i32 0, i32 addrspace(3)* @used_by_kernel
define amdgpu_kernel void @withcall() {
; GFX9-LABEL: withcall:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX9-NEXT: s_mov_b32 s14, -1
; GFX9-NEXT: s_mov_b32 s15, 0xe00000
; GFX9-NEXT: s_add_u32 s12, s12, s3
; GFX9-NEXT: s_addc_u32 s13, s13, 0
; GFX9-NEXT: s_add_u32 s8, s0, 36
; GFX9-NEXT: s_addc_u32 s9, s1, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: s_mov_b64 s[0:1], s[12:13]
; GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GFX9-NEXT: s_mov_b32 s10, -1
; GFX9-NEXT: s_mov_b32 s11, 0xe00000
; GFX9-NEXT: s_add_u32 s8, s8, s3
; GFX9-NEXT: s_addc_u32 s9, s9, 0
; GFX9-NEXT: s_getpc_b64 s[2:3]
; GFX9-NEXT: s_add_u32 s2, s2, nonkernel@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s3, s3, nonkernel@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b64 s[2:3], s[14:15]
; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -51,21 +50,20 @@ define amdgpu_kernel void @withcall() {
;
; GFX10-LABEL: withcall:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s14, -1
; GFX10-NEXT: s_mov_b32 s15, 0x31c16000
; GFX10-NEXT: s_add_u32 s12, s12, s3
; GFX10-NEXT: s_addc_u32 s13, s13, 0
; GFX10-NEXT: s_add_u32 s8, s0, 36
; GFX10-NEXT: s_addc_u32 s9, s1, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12
; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s10, -1
; GFX10-NEXT: s_mov_b32 s11, 0x31c16000
; GFX10-NEXT: s_add_u32 s8, s8, s3
; GFX10-NEXT: s_addc_u32 s9, s9, 0
; GFX10-NEXT: s_getpc_b64 s[2:3]
; GFX10-NEXT: s_add_u32 s2, s2, nonkernel@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s3, s3, nonkernel@gotpcrel32@hi+12
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX10-NEXT: s_mov_b64 s[0:1], s[12:13]
; GFX10-NEXT: s_mov_b64 s[2:3], s[14:15]
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX10-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX10-NEXT: s_mov_b64 s[0:1], s[8:9]
; GFX10-NEXT: s_mov_b64 s[2:3], s[10:11]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -74,22 +72,21 @@ define amdgpu_kernel void @withcall() {
;
; G_GFX9-LABEL: withcall:
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; G_GFX9-NEXT: s_mov_b32 s14, -1
; G_GFX9-NEXT: s_mov_b32 s15, 0xe00000
; G_GFX9-NEXT: s_add_u32 s12, s12, s3
; G_GFX9-NEXT: s_addc_u32 s13, s13, 0
; G_GFX9-NEXT: s_add_u32 s8, s0, 36
; G_GFX9-NEXT: s_addc_u32 s9, s1, 0
; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; G_GFX9-NEXT: s_mov_b32 s10, -1
; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000
; G_GFX9-NEXT: s_add_u32 s8, s8, s3
; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
; G_GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; G_GFX9-NEXT: s_getpc_b64 s[0:1]
; G_GFX9-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4
; G_GFX9-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12
; G_GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; G_GFX9-NEXT: s_mov_b64 s[0:1], s[12:13]
; G_GFX9-NEXT: s_mov_b64 s[0:1], s[8:9]
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
; G_GFX9-NEXT: v_mov_b32_e32 v1, 8
; G_GFX9-NEXT: s_mov_b64 s[2:3], s[14:15]
; G_GFX9-NEXT: s_mov_b64 s[2:3], s[10:11]
; G_GFX9-NEXT: s_mov_b32 s32, 0
; G_GFX9-NEXT: ds_write_b32 v1, v0
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -98,22 +95,21 @@ define amdgpu_kernel void @withcall() {
;
; G_GFX10-LABEL: withcall:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; G_GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; G_GFX10-NEXT: s_mov_b32 s14, -1
; G_GFX10-NEXT: s_mov_b32 s15, 0x31c16000
; G_GFX10-NEXT: s_add_u32 s12, s12, s3
; G_GFX10-NEXT: s_addc_u32 s13, s13, 0
; G_GFX10-NEXT: s_add_u32 s8, s0, 36
; G_GFX10-NEXT: s_addc_u32 s9, s1, 0
; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; G_GFX10-NEXT: s_mov_b32 s10, -1
; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000
; G_GFX10-NEXT: s_add_u32 s8, s8, s3
; G_GFX10-NEXT: s_addc_u32 s9, s9, 0
; G_GFX10-NEXT: s_mov_b64 s[6:7], s[0:1]
; G_GFX10-NEXT: s_getpc_b64 s[0:1]
; G_GFX10-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4
; G_GFX10-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; G_GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; G_GFX10-NEXT: v_mov_b32_e32 v1, 8
; G_GFX10-NEXT: s_mov_b64 s[0:1], s[12:13]
; G_GFX10-NEXT: s_mov_b64 s[2:3], s[14:15]
; G_GFX10-NEXT: s_mov_b64 s[0:1], s[8:9]
; G_GFX10-NEXT: s_mov_b64 s[2:3], s[10:11]
; G_GFX10-NEXT: s_mov_b32 s32, 0
; G_GFX10-NEXT: ds_write_b32 v1, v0
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
154 changes: 4 additions & 150 deletions llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

; XFAIL: *

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s
Expand Down Expand Up @@ -54,42 +51,6 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_endpgm
;
; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; DEFAULTSIZE-V5: ; %bb.0: ; %entry
; DEFAULTSIZE-V5-NEXT: s_add_u32 s0, s0, s9
; DEFAULTSIZE-V5-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8
; DEFAULTSIZE-V5-NEXT: s_addc_u32 s1, s1, 0
; DEFAULTSIZE-V5-NEXT: s_movk_i32 s32, 0x400
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, 0
; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s8, 0
; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB0_3
; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0
; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s9, 0
; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB0_3
; DEFAULTSIZE-V5-NEXT: ; %bb.2: ; %bb.1
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000
; DEFAULTSIZE-V5-NEXT: s_lshl_b32 s7, s10, 2
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v1, 0
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, 1
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s6, s7
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6
; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v0, v2, v0
; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: global_store_dword v1, v0, s[4:5]
; DEFAULTSIZE-V5-NEXT: .LBB0_3: ; %bb.2
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_endpgm
;
; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
Expand Down Expand Up @@ -149,8 +110,8 @@ bb.2:
store volatile i32 0, i32 addrspace(1)* undef
ret void
}
; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
; DEFAULTSIZE: ; ScratchSize: 16
; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
; DEFAULTSIZE: ; ScratchSize: 4112
; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 16
; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
; DEFAULTSIZE-V5: ; ScratchSize: 16
Expand Down Expand Up @@ -193,40 +154,6 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_endpgm
;
; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; DEFAULTSIZE-V5: ; %bb.0: ; %entry
; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8
; DEFAULTSIZE-V5-NEXT: s_add_u32 s0, s0, s9
; DEFAULTSIZE-V5-NEXT: s_addc_u32 s1, s1, 0
; DEFAULTSIZE-V5-NEXT: s_movk_i32 s32, 0x1000
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, 0
; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s6, 0
; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB1_2
; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000
; DEFAULTSIZE-V5-NEXT: s_and_b32 s6, s6, 0xfffff000
; DEFAULTSIZE-V5-NEXT: s_lshl_b32 s7, s7, 2
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v1, 0
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, 1
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s6, s7
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6
; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v0, v2, v0
; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: global_store_dword v1, v0, s[4:5]
; DEFAULTSIZE-V5-NEXT: .LBB1_2: ; %bb.1
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_endpgm
;
; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
Expand Down Expand Up @@ -279,8 +206,8 @@ bb.1:
ret void
}

; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
; DEFAULTSIZE: ; ScratchSize: 64
; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
; DEFAULTSIZE: ; ScratchSize: 4160
; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 64
; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
; DEFAULTSIZE-V5: ; ScratchSize: 64
Expand Down Expand Up @@ -326,42 +253,6 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; MUBUF-NEXT: s_mov_b32 s33, s7
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align4:
; DEFAULTSIZE-V5: ; %bb.0: ; %entry
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s7, s33
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s32
; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0x400
; DEFAULTSIZE-V5-NEXT: s_and_saveexec_b64 s[4:5], vcc
; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB2_3
; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0
; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; DEFAULTSIZE-V5-NEXT: s_and_b64 exec, exec, vcc
; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB2_3
; DEFAULTSIZE-V5-NEXT: ; %bb.2: ; %bb.1
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 0
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, s6
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 1
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4
; DEFAULTSIZE-V5-NEXT: v_lshl_add_u32 v2, v4, 2, s6
; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: v_and_b32_e32 v3, 0x3ff, v31
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v2, v2, v3
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v2, off
; DEFAULTSIZE-V5-NEXT: .LBB2_3: ; %bb.2
; DEFAULTSIZE-V5-NEXT: s_or_b64 exec, exec, s[4:5]
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0xfc00
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s7
; DEFAULTSIZE-V5-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -458,40 +349,6 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; MUBUF-NEXT: s_mov_b32 s33, s7
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align64:
; DEFAULTSIZE-V5: ; %bb.0: ; %entry
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s7, s33
; DEFAULTSIZE-V5-NEXT: s_add_i32 s33, s32, 0xfc0
; DEFAULTSIZE-V5-NEXT: s_and_b32 s33, s33, 0xfffff000
; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0x2000
; DEFAULTSIZE-V5-NEXT: s_and_saveexec_b64 s[4:5], vcc
; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB3_2
; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0
; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000
; DEFAULTSIZE-V5-NEXT: s_and_b32 s6, s6, 0xfffff000
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 0
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v4, s6
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 1
; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:4
; DEFAULTSIZE-V5-NEXT: v_lshl_add_u32 v2, v3, 2, s6
; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; DEFAULTSIZE-V5-NEXT: v_and_b32_e32 v3, 0x3ff, v31
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v2, v2, v3
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v2, off
; DEFAULTSIZE-V5-NEXT: .LBB3_2: ; %bb.1
; DEFAULTSIZE-V5-NEXT: s_or_b64 exec, exec, s[4:5]
; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0
; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off
; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0)
; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0xe000
; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s7
; DEFAULTSIZE-V5-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: func_non_entry_block_static_alloca_align64:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -549,6 +406,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ASSUME1024: {{.*}}
; DEFAULTSIZE: {{.*}}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
; ASM: buffer_store_dword
; ASM: buffer_store_dword
; ASM: s_swappc_b64
; ASM: ScratchSize: 16
; ASM: ScratchSize: 16400
define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
entry:
%tmp = alloca [2 x i32], addrspace(5)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/recursion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
; CHECK: .amdhsa_private_segment_fixed_size 16{{$}}
; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}}
;
; V5-LABEL: {{^}}calls_recursive:
; V5: .amdhsa_private_segment_fixed_size 0{{$}}
Expand All @@ -56,7 +56,7 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
; CHECK: .amdhsa_private_segment_fixed_size 0{{$}}
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
; V5: .amdhsa_private_segment_fixed_size 0{{$}}
Expand All @@ -67,7 +67,7 @@ define amdgpu_kernel void @kernel_calls_tail_recursive() {
}

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
; CHECK: .amdhsa_private_segment_fixed_size 8{{$}}
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
; V5: .amdhsa_private_segment_fixed_size 8{{$}}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s

target datalayout = "n32"

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define internal fastcc void @unreachable() {
; GCN: s_endpgm

; GCN: .amdhsa_private_segment_fixed_size 0
; GCN: .amdhsa_uses_dynamic_stack 0
; GCN-NOT: .amdhsa_uses_dynamic_stack 0
; GCN-V5: .amdhsa_uses_dynamic_stack 0
define amdgpu_kernel void @entry() {
bb0:
Expand Down
23 changes: 14 additions & 9 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,32 @@ define amdgpu_kernel void @kernel() {
; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s38, -1
; GCN-NEXT: s_mov_b32 s39, 0xe00000
; GCN-NEXT: s_add_u32 s36, s36, s9
; GCN-NEXT: v_writelane_b32 v40, s4, 0
; GCN-NEXT: s_add_u32 s36, s36, s11
; GCN-NEXT: v_writelane_b32 v40, s5, 1
; GCN-NEXT: s_addc_u32 s37, s37, 0
; GCN-NEXT: s_mov_b32 s14, s8
; GCN-NEXT: s_add_u32 s8, s2, 36
; GCN-NEXT: s_addc_u32 s9, s3, 0
; GCN-NEXT: s_mov_b64 s[10:11], s[4:5]
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_readlane_b32 s0, v40, 0
; GCN-NEXT: s_mov_b32 s13, s9
; GCN-NEXT: s_mov_b32 s12, s8
; GCN-NEXT: v_readlane_b32 s1, v40, 1
; GCN-NEXT: s_add_u32 s8, s0, 36
; GCN-NEXT: s_addc_u32 s9, s1, 0
; GCN-NEXT: s_getpc_b64 s[0:1]
; GCN-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12
; GCN-NEXT: s_mov_b32 s13, s7
; GCN-NEXT: s_mov_b32 s12, s6
; GCN-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GCN-NEXT: s_mov_b32 s14, s10
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[2:3]
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GCN-NEXT: s_mov_b64 s[0:1], s[36:37]
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
; GCN-NEXT: s_mov_b64 s[2:3], s[38:39]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_endpgm
call void @foo()
ret void
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/trap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ declare void @llvm.debugtrap() #1

; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 204
; MESA-TRAP-NEXT: .long 208

; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 140
; NOMESA-TRAP-NEXT: .long 144

; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 0
Expand Down
766 changes: 408 additions & 358 deletions llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,14 @@ define hidden void @blam() {
; GCN-NEXT: v_writelane_b32 v40, s47, 15
; GCN-NEXT: v_writelane_b32 v40, s48, 16
; GCN-NEXT: v_writelane_b32 v40, s49, 17
; GCN-NEXT: s_mov_b64 s[34:35], s[6:7]
; GCN-NEXT: v_mov_b32_e32 v41, v31
; GCN-NEXT: s_mov_b32 s44, s15
; GCN-NEXT: s_mov_b32 s45, s14
; GCN-NEXT: s_mov_b32 s46, s13
; GCN-NEXT: s_mov_b32 s47, s12
; GCN-NEXT: s_mov_b64 s[36:37], s[10:11]
; GCN-NEXT: s_mov_b64 s[38:39], s[8:9]
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
Expand Down Expand Up @@ -354,9 +354,9 @@ define hidden void @blam() {
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
; GCN-NEXT: s_mov_b64 s[6:7], s[34:35]
; GCN-NEXT: s_mov_b64 s[8:9], s[38:39]
; GCN-NEXT: s_mov_b64 s[10:11], s[36:37]
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s47
; GCN-NEXT: s_mov_b32 s13, s46
; GCN-NEXT: s_mov_b32 s14, s45
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {

// This constant must always match the default code object ABI version
// of the AMDGPU backend.
addControlConstant("__oclc_ABI_version", 500, 32);
addControlConstant("__oclc_ABI_version", 400, 32);
}

// Determine libraries we need to link - order matters due to dependencies
Expand Down