diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index e8ee672b6a8471..90da3390eab324 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_s_wqm: case Intrinsic::amdgcn_s_quadmask: case Intrinsic::amdgcn_s_bitreplicate: case Intrinsic::arm_mve_vctp8: @@ -2425,6 +2426,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::get(Ty->getContext(), Val); } + case Intrinsic::amdgcn_s_wqm: { + uint64_t Val = Op->getZExtValue(); + Val |= (Val & 0x5555555555555555ULL) << 1 | + ((Val >> 1) & 0x5555555555555555ULL); + Val |= (Val & 0x3333333333333333ULL) << 2 | + ((Val >> 2) & 0x3333333333333333ULL); + return ConstantInt::get(Ty, Val); + } + case Intrinsic::amdgcn_s_quadmask: { uint64_t Val = Op->getZExtValue(); uint64_t QuadMask = 0; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll index 6676dac19ba797..34faa347c2f91a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll @@ -9,13 +9,55 @@ define i32 @test_s_wqm_constant_i32() { ; GFX11-LABEL: test_s_wqm_constant_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_wqm_b32 s0, 0x85fe3a92 +; GFX11-NEXT: v_mov_b32_e32 v0, 0xff00ff0f +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 u0x85003A02) + ret i32 %br +} + +define i32 @test_s_wqm_constant_zero_i32() { +; GFX11-LABEL: test_s_wqm_constant_zero_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 0) + ret i32 %br +} + +define i32 @test_s_wqm_constant_neg_one_i32() { +; GFX11-LABEL: test_s_wqm_constant_neg_one_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, -1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 -1) + ret i32 %br +} + +define i32 @test_s_wqm_constant_undef_i32() { +; GFX11-LABEL: test_s_wqm_constant_undef_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_wqm_b32 s0, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 undef) + ret i32 %br +} + +define i32 @test_s_wqm_constant_poison_i32() { +; GFX11-LABEL: test_s_wqm_constant_poison_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_wqm_b32 s0, s0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 u0x85FE3A92) + %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 poison) ret i32 %br } + define amdgpu_cs void @test_s_wqm_sgpr_i32(i32 inreg %mask, ptr addrspace(1) %out) { ; GFX11-LABEL: test_s_wqm_sgpr_i32: ; GFX11: ; %bb.0: ; %entry @@ -48,12 +90,52 @@ define i64 @test_s_wqm_constant_i64() { ; GFX11-LABEL: test_s_wqm_constant_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0x85fe3a92 -; GFX11-NEXT: s_mov_b32 s1, 0x3a9285fe +; GFX11-NEXT: v_mov_b32_e32 v0, 0xff00ffff +; GFX11-NEXT: v_mov_b32_e32 v1, 0xffff0fff +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 u0x12480FDBAC00753E) + ret i64 %br +} + +define i64 @test_s_wqm_constant_zero_i64() { +; GFX11-LABEL: test_s_wqm_constant_zero_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 0) + ret i64 %br +} + +define i64 @test_s_wqm_constant_neg_one_i64() { +; GFX11-LABEL: test_s_wqm_constant_neg_one_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, -1 :: v_dual_mov_b32 v1, -1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 -1) + ret i64 %br +} + +define i64 @test_s_wqm_constant_undef_i64() { +; GFX11-LABEL: test_s_wqm_constant_undef_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_wqm_b64 s[0:1], s[0:1] +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 undef) + ret i64 %br +} + +define i64 @test_s_wqm_constant_poison_i64() { +; GFX11-LABEL: test_s_wqm_constant_poison_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_wqm_b64 s[0:1], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 u0x3A9285FE85FE3A92) + %br = call i64 @llvm.amdgcn.s.wqm.i64(i64 poison) ret i64 %br }