diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 51c56ecea2c96..913f49503660f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1684,6 +1684,15 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { } } + if (Opc == AMDGPU::V_ADD_U32_e64 || Opc == AMDGPU::V_ADD_U32_e32) { + if (Src1Val == 0) { + // y = add x, 0 -> y = copy x + MI->removeOperand(Src1Idx); + mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + return true; + } + } + return false; } diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll new file mode 100644 index 0000000000000..e52eb8aca9f84 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s + +@global_smem = external addrspace(3) global [0 x i8] + +define amdgpu_kernel void @addzero() { +; GCN-LABEL: addzero: +; GCN: ; %bb.0: ; %.lr.ph +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: ds_write_b64 v0, v[2:3] +; GCN-NEXT: s_endpgm +.lr.ph: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() + %1 = and i32 %0, 1 + %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1 + store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8 + ret void +}