Skip to content

Commit

Permalink
[AMDGPU] Fix error in #88512. (#92770)
Browse files Browse the repository at this point in the history
Fixes error in GlobalISel CTLZ lowering caused by
[#88512](#88512).

---------

Co-authored-by: Leon Clark <leoclark@amd.com>
  • Loading branch information
PeddleSpam and Leon Clark committed May 20, 2024
1 parent 1eb7f05 commit e1c06c3
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 37 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4168,7 +4168,7 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI,

auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
auto Shift = B.buildLShr(S32, {Extend}, ShiftAmt);
auto Shift = B.buildShl(S32, Extend, ShiftAmt);
auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
B.buildTrunc(Dst, Ctlz);
MI.eraseFromParent();
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
Expand Down Expand Up @@ -147,10 +147,10 @@ body: |
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR1]](s32)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR2]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL2]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U321]], [[C1]]
Expand All @@ -175,8 +175,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FFBH]], [[C1]]
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
Expand Down
76 changes: 48 additions & 28 deletions llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 24
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 24
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
Expand Down Expand Up @@ -452,7 +452,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 16
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 16
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3]
Expand Down Expand Up @@ -655,7 +655,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
Expand Down Expand Up @@ -760,7 +761,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
; GFX9-GISEL-NEXT: global_load_ubyte v2, v0, s[2:3] offset:1
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 8, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
Expand Down Expand Up @@ -1167,7 +1169,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, p
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -1705,8 +1708,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0
; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[2:3]
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -2186,7 +2190,7 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 true)
Expand Down Expand Up @@ -2278,7 +2282,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 14
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
Expand Down Expand Up @@ -2317,7 +2321,7 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call i18 @llvm.ctlz.i18(i18 %val, i1 true)
Expand Down Expand Up @@ -2355,8 +2359,8 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 14, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -2394,10 +2398,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %val, i1 true)
ret <2 x i16> %ctlz
Expand Down Expand Up @@ -2439,11 +2446,15 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call <3 x i16> @llvm.ctlz.v3i16(<3 x i16> %val, i1 true)
ret <3 x i16> %ctlz
Expand Down Expand Up @@ -2492,13 +2503,20 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4, 16, v0
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %val, i1 true)
ret <4 x i16> %ctlz
Expand Down Expand Up @@ -2536,8 +2554,10 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
%ctlz = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %val, i1 true)
ret <2 x i8> %ctlz
Expand Down Expand Up @@ -2579,8 +2599,8 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 25, v1
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
Expand Down

0 comments on commit e1c06c3

Please sign in to comment.