Skip to content

Commit

Permalink
AMDGPU: Move R600 test compatability hack
Browse files Browse the repository at this point in the history
Instead of handling the r600 intrinsics on amdgcn, handle the amdgcn
intrinsics on r600.
  • Loading branch information
arsenm committed Feb 10, 2020
1 parent f319074 commit 7af7b96
Show file tree
Hide file tree
Showing 28 changed files with 169 additions and 285 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
Expand Up @@ -615,21 +615,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, 8);

case Intrinsic::r600_read_tgid_x:
case Intrinsic::amdgcn_workgroup_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_X, VT);
case Intrinsic::r600_read_tgid_y:
case Intrinsic::amdgcn_workgroup_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Y, VT);
case Intrinsic::r600_read_tgid_z:
case Intrinsic::amdgcn_workgroup_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Z, VT);
case Intrinsic::r600_read_tidig_x:
case Intrinsic::amdgcn_workitem_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_X, VT);
case Intrinsic::r600_read_tidig_y:
case Intrinsic::amdgcn_workitem_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Y, VT);
case Intrinsic::r600_read_tidig_z:
case Intrinsic::amdgcn_workitem_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Z, VT);

Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -5807,29 +5807,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x:
case Intrinsic::r600_read_tgid_x:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
Expand Down
97 changes: 0 additions & 97 deletions llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
Expand Up @@ -139,95 +139,6 @@ entry:
ret void
}

; Legacy use of r600 intrinsics by GCN

; The tgid values are stored in sgprs offset by the number of user
; sgprs.

; FUNC-LABEL: {{^}}tgid_x_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; FUNC-LABEL: {{^}}tgid_y_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; FUNC-LABEL: {{^}}tgid_z_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]

; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 132{{$}}

; FUNC-LABEL: {{^}}tidig_x_legacy:
; GCN-NOHSA: buffer_store_dword v0
define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 2180{{$}}

; FUNC-LABEL: {{^}}tidig_y_legacy:

; GCN-NOHSA: buffer_store_dword v1
define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 4228{{$}}

; FUNC-LABEL: {{^}}tidig_z_legacy:
; GCN-NOHSA: buffer_store_dword v2
define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}

declare i32 @llvm.r600.read.ngroups.x() #0
declare i32 @llvm.r600.read.ngroups.y() #0
declare i32 @llvm.r600.read.ngroups.z() #0
Expand All @@ -240,12 +151,4 @@ declare i32 @llvm.r600.read.local.size.x() #0
declare i32 @llvm.r600.read.local.size.y() #0
declare i32 @llvm.r600.read.local.size.z() #0

declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
declare i32 @llvm.r600.read.tgid.z() #0

declare i32 @llvm.r600.read.tidig.x() #0
declare i32 @llvm.r600.read.tidig.y() #0
declare i32 @llvm.r600.read.tidig.z() #0

attributes #0 = { readnone }
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/and.ll
Expand Up @@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

declare i32 @llvm.r600.read.tidig.x() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0

; FUNC-LABEL: {{^}}test2:
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
Expand Down Expand Up @@ -96,7 +96,7 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
Expand All @@ -112,7 +112,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrs
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.b
Expand All @@ -126,7 +126,7 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load i32, i32 addrspace(1)* %gep.a
Expand All @@ -138,7 +138,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrs
; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 1234567
Expand All @@ -149,7 +149,7 @@ define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrsp
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 64
Expand All @@ -160,7 +160,7 @@ define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 a
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, -16
Expand Down Expand Up @@ -251,7 +251,7 @@ define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI: v_and_b32
; SI: v_and_b32
define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
Expand All @@ -266,7 +266,7 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1231231234567
Expand Down Expand Up @@ -322,7 +322,7 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI-NOT: and
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1234567
Expand All @@ -337,7 +337,7 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64
; SI-NOT: and
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 64
Expand All @@ -353,7 +353,7 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
; SI-NOT: and
; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, -8
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/ctlz.ll
Expand Up @@ -15,7 +15,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone

declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; SI-LABEL: s_ctlz_i32:
Expand Down Expand Up @@ -120,7 +120,7 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -195,7 +195,7 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -288,7 +288,7 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -576,7 +576,7 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep
Expand Down Expand Up @@ -663,7 +663,7 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z,
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep
Expand Down Expand Up @@ -729,7 +729,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -795,7 +795,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -872,7 +872,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -948,7 +948,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -1017,7 +1017,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %valptr.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
Expand Down Expand Up @@ -1160,7 +1160,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x()
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
%val = load i7, i7 addrspace(1)* %valptr.gep
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
Expand Down

0 comments on commit 7af7b96

Please sign in to comment.