-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][NFC] Test autogenerated llc tests for COV5 #74339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Saiyedul Islam (saiislam) ChangesRegenerate a few llc tests to test for COV5 instead of the default ABI version. Patch is 847.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74339.diff 22 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index c7bc584d42d5a..121656b6a30f2 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -276,16 +276,16 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
; FIXEDABI-SDAG: ; %bb.0:
; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x40
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0
; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
-; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s5
-; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
-; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s4
-; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
-; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc
; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v4, s4
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0
; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
@@ -297,12 +297,15 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
; FIXEDABI-GISEL: ; %bb.0:
; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x40
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc0
+; FIXEDABI-GISEL-NEXT: s_load_dword s6, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc4
+; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0
; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v0, s5
-; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
+; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s6
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s4
; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
@@ -322,16 +325,29 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
}
define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
-; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
-; FIXEDABI: ; %bb.0:
-; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x40
-; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
-; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
-; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; FIXEDABI-NEXT: flat_store_dword v[0:1], v0
-; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
-; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+; FIXEDABI-SDAG-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc4
+; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31]
%is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
%zext = zext i1 %is.shared to i32
store volatile i32 %zext, ptr addrspace(1) undef
@@ -339,16 +355,29 @@ define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
}
define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
-; FIXEDABI-LABEL: is_private_requires_queue_ptr:
-; FIXEDABI: ; %bb.0:
-; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x44
-; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
-; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
-; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; FIXEDABI-NEXT: flat_store_dword v[0:1], v0
-; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
-; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+; FIXEDABI-SDAG-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc0
+; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31]
%is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
%zext = zext i1 %is.private to i32
store volatile i32 %zext, ptr addrspace(1) undef
@@ -356,11 +385,21 @@ define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
}
define void @trap_requires_queue() #0 {
-; FIXEDABI-LABEL: trap_requires_queue:
-; FIXEDABI: ; %bb.0:
-; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT: s_mov_b64 s[0:1], s[6:7]
-; FIXEDABI-NEXT: s_trap 2
+; FIXEDABI-SDAG-LABEL: trap_requires_queue:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_trap 2
+;
+; FIXEDABI-GISEL-LABEL: trap_requires_queue:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc8
+; FIXEDABI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_trap 2
call void @llvm.trap()
unreachable
}
@@ -390,3 +429,6 @@ declare void @llvm.trap()
declare void @llvm.debugtrap()
attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
index 2d60faaf8c8c0..e962df69b45c1 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
@@ -21,16 +21,29 @@ define ptr addrspace(1) @flat_to_gobal_addrspacecast(ptr %ptr) {
}
define ptr @group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
-; CHECK-LABEL: group_to_flat_addrspacecast:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s4, s[6:7], 0x10
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v1, s4
-; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; SDAG-LABEL: group_to_flat_addrspacecast:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_mov_b64 s[4:5], 0
+; SDAG-NEXT: s_load_dword s4, s[4:5], 0x0
+; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v1, s4
+; SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: group_to_flat_addrspacecast:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b64 s[4:5], 0xc4
+; GISEL-NEXT: s_load_dword s4, s[4:5], 0x0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v1, s4
+; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
ret ptr %stof
}
@@ -47,16 +60,29 @@ define ptr addrspace(3) @flat_to_group_addrspacecast(ptr %ptr) {
}
define ptr @private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
-; CHECK-LABEL: private_to_flat_addrspacecast:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_load_dword s4, s[6:7], 0x11
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v1, s4
-; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; SDAG-LABEL: private_to_flat_addrspacecast:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_mov_b64 s[4:5], 0
+; SDAG-NEXT: s_load_dword s4, s[4:5], 0x0
+; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v1, s4
+; SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: private_to_flat_addrspacecast:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b64 s[4:5], 0xc0
+; GISEL-NEXT: s_load_dword s4, s[4:5], 0x0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v1, s4
+; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
ret ptr %stof
}
@@ -204,3 +230,6 @@ define ptr addrspace(6) @addrspacecast_flat_null_to_constant32bit() {
}
attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index 8c64ab5952def..e53fa9650f9ec 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -146,13 +146,13 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
+; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
-; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v42, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -163,9 +163,9 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -285,13 +285,13 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v43, v31
+; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
-; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v42, v3
; CHECK-NEXT: v_mov_b32_e32 v41, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -302,9 +302,9 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -430,13 +430,13 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
+; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
-; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v42, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -447,9 +447,9 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -571,13 +571,13 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
+; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
-; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: v_lshlrev_b32_e32 v42, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -588,9 +588,9 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -715,13 +715,13 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
+; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
-; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: v_or_b32_e32 v43, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -732,9 +732,9 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
@@ -773,3 +773,6 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
%call = tail call fast double @_Z4powndi(double %x, i32 %y)
ret double %call
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 1f90c0d03a856..1396dab69c13a 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -458,7 +458,7 @@ define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR8]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
@@ -477,7 +477,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
; AKF_HSA-NEXT: ret void
;
; ATTR...
[truncated]
|
I have only added following module flag to these tests and regenerated them using
|
; FIXEDABI-GISEL-LABEL: trap_requires_queue: | ||
; FIXEDABI-GISEL: ; %bb.0: | ||
; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is highly suspicious, the globalisel path is using a different value. Is globalisel somehow not respecting the CO version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The change itself seems fine (though the new metadata spam is a bit annoying, and should eventually be removed).
The DAG vs. GlobalISel divergence in some of these cases is concerning and shouldn't be happening
The queue_ptr offset was incorrectly dropped for SDAG. Can you include the following fix? Thanks. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
|
Regenerate a few llc tests to test for COV5 instead of the default ABI version.
You can test this locally with the following command:git-clang-format --diff 8f6f5ec77615e2ae137d0b1e306abbac6f7fc0e8 a16ede8772381ec3e7500d0a8234198e283a22cf -- llvm/lib/Target/AMDGPU/SIISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7ac36d4b02..dd71a96173 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1343,15 +1343,15 @@ bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
SIInstrFlags::FlatGlobal));
if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
- // Assume the we will use FLAT for all global memory accesses
- // on VI.
- // FIXME: This assumption is currently wrong. On VI we still use
- // MUBUF instructions for the r + i addressing mode. As currently
- // implemented, the MUBUF instructions only work on buffer < 4GB.
- // It may be possible to support > 4GB buffers with MUBUF instructions,
- // by setting the stride value in the resource descriptor which would
- // increase the size limit to (stride * 4GB). However, this is risky,
- // because it has never been validated.
+ // Assume the we will use FLAT for all global memory accesses
+ // on VI.
+ // FIXME: This assumption is currently wrong. On VI we still use
+ // MUBUF instructions for the r + i addressing mode. As currently
+ // implemented, the MUBUF instructions only work on buffer < 4GB.
+ // It may be possible to support > 4GB buffers with MUBUF instructions,
+ // by setting the stride value in the resource descriptor which would
+ // increase the size limit to (stride * 4GB). However, this is risky,
+ // because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
|
Thank you @changpeng . I have incorporated your change and regenerated all the tests. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Regenerate a few llc tests to test for COV5 instead of the default ABI version.