From 0ba764985d74ed02e1c0eda6d7436d50ab3a01c3 Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Thu, 20 Nov 2025 12:35:56 +0000 Subject: [PATCH 1/6] Propagate AA info in vector load/store splitting. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 19b3ae5e695c7..5b24166b1a8be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1890,12 +1890,14 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, - BaseAlign, Load->getMemOperand()->getFlags()); + BaseAlign, Load->getMemOperand()->getFlags(), + Load->getAAInfo()); SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size)); SDValue HiLoad = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), - HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); + HiMemVT, HiAlign, Load->getMemOperand()->getFlags(), + Load->getAAInfo()); SDValue Join; if (LoVT == HiVT) { @@ -1983,10 +1985,11 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, SDValue LoStore = DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, - Store->getMemOperand()->getFlags()); + Store->getMemOperand()->getFlags(), Store->getAAInfo()); SDValue HiStore = DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), - HiMemVT, HiAlign, Store->getMemOperand()->getFlags()); + HiMemVT, HiAlign, Store->getMemOperand()->getFlags(), + Store->getAAInfo()); return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); } From adc94c3b01abd2c9fefebb77c7e0b507bd8c4a5b Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Thu, 20 Nov 2025 13:04:12 +0000 Subject: [PATCH 2/6] Formatting. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5b24166b1a8be..971dfdbe3e70a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1888,16 +1888,14 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, Align BaseAlign = Load->getAlign(); Align HiAlign = commonAlignment(BaseAlign, Size); - SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, - Load->getChain(), BasePtr, SrcValue, LoMemVT, - BaseAlign, Load->getMemOperand()->getFlags(), - Load->getAAInfo()); + SDValue LoLoad = DAG.getExtLoad( + Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, + LoMemVT, BaseAlign, Load->getMemOperand()->getFlags(), Load->getAAInfo()); SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size)); - SDValue HiLoad = - DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), - HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), - HiMemVT, HiAlign, Load->getMemOperand()->getFlags(), - Load->getAAInfo()); + SDValue HiLoad = DAG.getExtLoad( + Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, + SrcValue.getWithOffset(LoMemVT.getStoreSize()), HiMemVT, HiAlign, + Load->getMemOperand()->getFlags(), Load->getAAInfo()); SDValue Join; if (LoVT == HiVT) { @@ -1986,10 +1984,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, SDValue LoStore = DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, Store->getMemOperand()->getFlags(), Store->getAAInfo()); - SDValue HiStore = - DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), - HiMemVT, HiAlign, Store->getMemOperand()->getFlags(), - Store->getAAInfo()); + SDValue HiStore = DAG.getTruncStore( + Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), HiMemVT, HiAlign, + Store->getMemOperand()->getFlags(), Store->getAAInfo()); return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); } From fe0995e1995b8b1d1a8bae8941e2979d3120a82a Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Fri, 21 Nov 2025 15:37:19 +0000 Subject: [PATCH 3/6] Add tests. --- .../AMDGPU/si-split-load-store-alias-info.ll | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll diff --git a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll new file mode 100644 index 0000000000000..90edb30f1a98c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=amdgcn -stop-after finalize-isel < %s | FileCheck %s + +; This test verifies that instruction selection will propagate alias metadata +; to split loads and stores. + +; CHECK: %{{[0-9]+}}:vreg_64 = DS_READ_B64 {{.*}} :: (load (s64) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: DS_WRITE_B64 {{.*}} :: (store (s64) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) + +define amdgpu_kernel void @test(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) { + %idx = call i32 @llvm.amdgcn.workitem.id.x() + %in.addr = getelementptr <16 x float>, ptr addrspace(3) %in, i32 %idx + %val.0 = load <16 x float>, ptr addrspace(3) %in.addr, align 32, !alias.scope !4, !noalias !5 + %val.1 = call <16 x float> @llvm.amdgcn.wmma.f32.16x16x16.f32.v16f32.v16f32(<16 x float> %val.0, <16 x float> %val.0, <16 x float> %val.0, i1 false) + %out.addr = getelementptr <16 x float>, ptr addrspace(3) %out, i32 %idx + store <16 x float> %val.1, ptr addrspace(3) %out.addr, align 32, !alias.scope !5, !noalias !4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() +declare <16 x float> @llvm.amdgcn.wmma.f32.16x16x16.f32.v16f32.v16f32(<16 x float>, <16 x float>, <16 x float>, i1 immarg) + +!0 = !{!"inout.domain"} +!1 = !{!"in.scope", !0} +!2 = !{!"out.scope", !0} +!4 = !{!1} +!5 = !{!2} From 69c4e7e92fbf72f87e54c8dc53ba88704dafa733 Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Fri, 21 Nov 2025 16:08:13 +0000 Subject: [PATCH 4/6] Address review comments. --- llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll index 90edb30f1a98c..e51d6eb2a17d5 100644 --- a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll +++ b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll @@ -1,10 +1,10 @@ -; RUN: llc -mtriple=amdgcn -stop-after finalize-isel < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=finalize-isel < %s | FileCheck %s ; This test verifies that instruction selection will propagate alias metadata ; to split loads and stores. -; CHECK: %{{[0-9]+}}:vreg_64 = DS_READ_B64 {{.*}} :: (load (s64) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK: DS_WRITE_B64 {{.*}} :: (store (s64) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @test(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) { %idx = call i32 @llvm.amdgcn.workitem.id.x() From b88af22a31941d452ba369bb96e44e606dc304e7 Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Fri, 21 Nov 2025 16:21:38 +0000 Subject: [PATCH 5/6] Address review comments. --- .../AMDGPU/si-split-load-store-alias-info.ll | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll index e51d6eb2a17d5..cd1638ebe011c 100644 --- a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll +++ b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll @@ -3,8 +3,17 @@ ; This test verifies that instruction selection will propagate alias metadata ; to split loads and stores. -; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @test(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) { %idx = call i32 @llvm.amdgcn.workitem.id.x() From e5b508dcae84c2fa0b30be4ad43a50101baa5ccf Mon Sep 17 00:00:00 2001 From: Leon Clark Date: Fri, 21 Nov 2025 16:43:15 +0000 Subject: [PATCH 6/6] Address review comments. --- .../AMDGPU/si-split-load-store-alias-info.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll index cd1638ebe011c..fa1710fe8b6f8 100644 --- a/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll +++ b/llvm/test/CodeGen/AMDGPU/si-split-load-store-alias-info.ll @@ -3,17 +3,17 @@ ; This test verifies that instruction selection will propagate alias metadata ; to split loads and stores. -; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) -; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope ![[IN:[0-9]+]], !noalias ![[OUT:[0-9]+]], addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3) +; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3) +; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3) ; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE -; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3) ; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE -; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3) ; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE -; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope !{{[0-9]+}}, !noalias !{{[0-9]+}}, addrspace 3) +; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3) define amdgpu_kernel void @test(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) { %idx = call i32 @llvm.amdgcn.workitem.id.x()