From 2985c04ab47476257c99b6b6894cb16cb44992f2 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 24 Sep 2025 12:23:20 -0700 Subject: [PATCH] [AMDGPU] Add gfx1250 memory lealizer tests run lines. NFC --- .../memory-legalizer-private-lastuse.ll | 48 ++++++++++++++ .../memory-legalizer-private-nontemporal.ll | 62 +++++++++++++++++++ .../memory-legalizer-private-volatile.ll | 53 ++++++++++++++++ 3 files changed, 163 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll index bc905fa564f8a..80ea48be0b893 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) { ; GFX12-LABEL: private_last_use_load_0: @@ -13,6 +14,17 @@ define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr add ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: private_last_use_load_0: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm entry: %val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{} store i32 %val, ptr addrspace(1) %out @@ -36,6 +48,20 @@ define amdgpu_kernel void @private_last_use_load_1(ptr addrspace(5) %in, ptr add ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: private_last_use_load_1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, v0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_LU +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid @@ -57,6 +83,17 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) % ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: private_last_use_and_volatile_load: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm entry: %val = load volatile i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{} store i32 %val, ptr addrspace(1) %out @@ -74,6 +111,17 @@ define amdgpu_kernel void @private_last_use_and_nontemporal_load(ptr addrspace(5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: private_last_use_and_nontemporal_load: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm entry: %val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}, !nontemporal !0 store i32 %val, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index 2aa4f021c259c..89de17ecbd1e8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -12,6 +12,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s define amdgpu_kernel void @private_nontemporal_load_0( ; GFX6-LABEL: private_nontemporal_load_0: @@ -201,6 +202,17 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_nontemporal_load_0: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %val = load i32, ptr addrspace(5) %in, align 4, !nontemporal !0 @@ -450,6 +462,20 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_nontemporal_load_1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, v0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_NT +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -627,6 +653,17 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_nontemporal_store_0: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v0, s1 +; GFX1250-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT +; GFX1250-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %val = load i32, ptr addrspace(1) %in, align 4 @@ -846,6 +883,20 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 th:TH_STORE_NT ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_nontemporal_store_1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s2, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v0, s1 +; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset th:TH_STORE_NT +; GFX1250-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1047,6 +1098,17 @@ define amdgpu_kernel void @private_nontemporal_volatile_load( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_nontemporal_volatile_load: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %val = load volatile i32, ptr addrspace(5) %in, align 4, !nontemporal !0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll index df4193969f8a0..7faa0621aa6d0 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -8,6 +8,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s define amdgpu_kernel void @private_volatile_load_0( ; GFX6-LABEL: private_volatile_load_0: @@ -155,6 +156,17 @@ define amdgpu_kernel void @private_volatile_load_0( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_volatile_load_0: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, off, s2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %val = load volatile i32, ptr addrspace(5) %in, align 4 @@ -340,6 +352,20 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_volatile_load_1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, v0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm ptr addrspace(5) %in, ptr addrspace(1) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -490,6 +516,18 @@ define amdgpu_kernel void @private_volatile_store_0( ; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_volatile_store_0: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v0, s1 +; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %val = load i32, ptr addrspace(1) %in, align 4 @@ -664,6 +702,21 @@ define amdgpu_kernel void @private_volatile_store_1( ; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 scope:SCOPE_SYS ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: private_volatile_store_1: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s2, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v0, s1 +; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x()