diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 8d4cad4c07bc7..488b37e27d322 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -155,7 +155,8 @@ def AMDGPUPostLegalizerCombiner: GICombiner< def AMDGPURegBankCombiner : GICombiner< "AMDGPURegBankCombinerImpl", - [unmerge_merge, unmerge_cst, unmerge_undef, + [unmerge_merge, unmerge_cst, unmerge_undef, trunc_ext_fold, + anyext_trunc_fold, select_constant_cmp, zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> { } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 31d72fb8cadd8..85b5b59886199 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2153,24 +2153,40 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { assert(Size <= 32 || Size == 64); const MachineOperand &CCOp = I.getOperand(1); Register CCReg = CCOp.getReg(); + + Register TrueVal = I.getOperand(2).getReg(); + Register FalseVal = I.getOperand(3).getReg(); if (!isVCC(CCReg, *MRI)) { unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) .addReg(CCReg); - // The generic constrainSelectedInstRegOperands doesn't work for the scc register - // bank, because it does not cover the register class that we used to represent - // for it. So we need to manually set the register class here. - if (!MRI->getRegClassOrNull(CCReg)) - MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); - MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) - .add(I.getOperand(2)) - .add(I.getOperand(3)); + bool Ret = constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); + + // select 1, 0 is just a copy SCC. + if (getIConstantVRegVal(TrueVal, *MRI) == 1 && + getIConstantVRegVal(FalseVal, *MRI) == 0) { + // FIXME: Do we need to have two copies or could we get away with just + // returning CCReg? + MachineInstr *RetCopy = + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg) + .addReg(AMDGPU::SCC); + Ret |= constrainSelectedInstRegOperands(*RetCopy, TII, TRI, RBI); + } else { + // The generic constrainSelectedInstRegOperands doesn't work for the scc + // register bank, because it does not cover the register class that we + // used to represent for it. So we need to manually set the register + // class here. + if (!MRI->getRegClassOrNull(CCReg)) + MRI->setRegClass(CCReg, + TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); + MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) + .addReg(TrueVal) + .addReg(FalseVal); + Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); + } - bool Ret = false; - Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); - Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); I.eraseFromParent(); return Ret; } @@ -2181,11 +2197,11 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) - .addImm(0) - .add(I.getOperand(3)) - .addImm(0) - .add(I.getOperand(2)) - .add(I.getOperand(1)); + .addImm(0) + .addReg(FalseVal) + .addImm(0) + .addReg(TrueVal) + .add(I.getOperand(1)); bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); I.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5b056bd9e5dba..80535363249ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2556,16 +2556,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_SEXT: case AMDGPU::G_ZEXT: case AMDGPU::G_ANYEXT: { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); const bool Signed = Opc == AMDGPU::G_SEXT; + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + assert(OpdMapper.getVRegs(1).empty()); const RegisterBank *SrcBank = OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank; - Register DstReg = MI.getOperand(0).getReg(); + LLT SelType = MRI.getType(DstReg); + + // Extending SGPR S1 to S16/32/64. + if (SrcBank == &AMDGPU::SGPRRegBank && + MRI.getType(SrcReg) == LLT::scalar(1)) { + assert(SelType == S32 || SelType == S16 || SelType == S64); + B.setInstrAndDebugLoc(MI); + + Register False = B.buildConstant(SelType, 0).getReg(0); + MRI.setRegBank(False, AMDGPU::SGPRRegBank); + + Register True = B.buildConstant(SelType, Signed ? -1 : 1).getReg(0); + MRI.setRegBank(True, AMDGPU::SGPRRegBank); + + Register SrcExt = B.buildZExt(S32, SrcReg).getReg(0); + MRI.setRegBank(SrcExt, AMDGPU::SGPRRegBank); + + B.buildSelect(DstReg, SrcExt, True, False); + MRI.setRegBank(DstReg, *SrcBank); + + MI.eraseFromParent(); + return; + } + LLT DstTy = MRI.getType(DstReg); if (DstTy.isScalar() && SrcBank != &AMDGPU::SGPRRegBank && @@ -2609,7 +2637,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( SrcBank->getID() == AMDGPU::SGPRRegBankID; // TODO: Should s16 select be legal? - LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32); + LLT SelType = UseSel64 ? S64 : S32; auto True = B.buildConstant(SelType, Signed ? -1 : 1); auto False = B.buildConstant(SelType, 0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll index a1013f3803e78..cdb817a009110 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll @@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) { ; GFX7-NEXT: s_cmp_lt_i32 s1, 0 ; GFX7-NEXT: s_cselect_b32 s1, 1, 0 ; GFX7-NEXT: s_xor_b32 s0, s1, s0 -; GFX7-NEXT: s_and_b32 s0, s0, 1 ; GFX7-NEXT: s_add_i32 s0, s2, s0 ; GFX7-NEXT: ; return to shader part epilog ; @@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) { ; GFX8-NEXT: s_cmp_lt_i32 s1, 0 ; GFX8-NEXT: s_cselect_b32 s1, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_add_i32 s0, s2, s0 ; GFX8-NEXT: ; return to shader part epilog ; @@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) { ; GFX9-NEXT: s_cmp_lt_i32 s1, 0 ; GFX9-NEXT: s_cselect_b32 s1, 1, 0 ; GFX9-NEXT: s_xor_b32 s0, s1, s0 -; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_add_i32 s0, s2, s0 ; GFX9-NEXT: ; return to shader part epilog %saddo = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) @@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX7-NEXT: s_cselect_b32 s3, 1, 0 ; GFX7-NEXT: s_xor_b32 s0, s2, s0 ; GFX7-NEXT: s_xor_b32 s1, s3, s1 -; GFX7-NEXT: s_and_b32 s0, s0, 1 -; GFX7-NEXT: s_and_b32 s1, s1, 1 ; GFX7-NEXT: s_add_i32 s0, s4, s0 ; GFX7-NEXT: s_add_i32 s1, s5, s1 ; GFX7-NEXT: ; return to shader part epilog @@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX8-NEXT: s_cselect_b32 s3, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s2, s0 ; GFX8-NEXT: s_xor_b32 s1, s3, s1 -; GFX8-NEXT: s_and_b32 s0, s0, 1 -; GFX8-NEXT: s_and_b32 s1, s1, 1 ; GFX8-NEXT: s_add_i32 s0, s4, s0 ; GFX8-NEXT: s_add_i32 s1, s5, s1 ; GFX8-NEXT: ; return to shader part epilog @@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX9-NEXT: s_cselect_b32 s3, 1, 0 ; GFX9-NEXT: s_xor_b32 s0, s2, s0 ; GFX9-NEXT: s_xor_b32 s1, s3, s1 -; GFX9-NEXT: s_and_b32 s0, s0, 1 -; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_add_i32 s0, s4, s0 ; GFX9-NEXT: s_add_i32 s1, s5, s1 ; GFX9-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll index 043e69abaeef2..38d49add27df1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll @@ -66,7 +66,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; GCN-NEXT: .LBB2_2: ; %Flow ; GCN-NEXT: s_xor_b32 s2, s4, 1 -; GCN-NEXT: s_and_b32 s2, s2, 1 ; GCN-NEXT: s_cmp_lg_u32 s2, 0 ; GCN-NEXT: s_cbranch_scc1 .LBB2_4 ; GCN-NEXT: ; %bb.3: ; %.zero diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index da9601a8998c2..138eb26063f67 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -36,7 +36,6 @@ define amdgpu_kernel void @localize_constants(i1 %cond) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB0_2: ; %Flow ; GFX9-NEXT: s_xor_b32 s0, s0, 1 -; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB0_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 @@ -121,7 +120,6 @@ define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB1_2: ; %Flow ; GFX9-NEXT: s_xor_b32 s0, s0, 1 -; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll index eb3f74be71de0..cef9e86c49e51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -860,7 +860,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX7-NEXT: v_mov_b32_e32 v1, s12 ; GFX7-NEXT: v_mul_hi_u32 v3, s16, v1 ; GFX7-NEXT: s_mul_i32 s18, s1, s8 -; GFX7-NEXT: s_cselect_b32 s25, 1, 0 +; GFX7-NEXT: s_cselect_b32 s26, 1, 0 ; GFX7-NEXT: s_add_u32 s18, s18, s17 ; GFX7-NEXT: s_addc_u32 s17, s23, s22 ; GFX7-NEXT: v_mov_b32_e32 v4, s11 @@ -871,33 +871,33 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX7-NEXT: s_mul_i32 s24, s1, s11 ; GFX7-NEXT: v_readfirstlane_b32 s28, v3 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: v_readfirstlane_b32 s27, v5 +; GFX7-NEXT: v_readfirstlane_b32 s25, v5 ; GFX7-NEXT: v_mul_hi_u32 v5, v3, s9 -; GFX7-NEXT: s_cselect_b32 s26, 1, 0 +; GFX7-NEXT: s_cselect_b32 s27, 1, 0 ; GFX7-NEXT: s_add_u32 s24, s24, s22 -; GFX7-NEXT: s_addc_u32 s23, s27, s23 +; GFX7-NEXT: s_addc_u32 s23, s25, s23 ; GFX7-NEXT: v_readfirstlane_b32 s29, v5 ; GFX7-NEXT: v_mov_b32_e32 v5, s4 ; GFX7-NEXT: v_mul_hi_u32 v6, v5, s8 -; GFX7-NEXT: s_mul_i32 s27, s2, s10 +; GFX7-NEXT: s_mul_i32 s25, s2, s10 ; GFX7-NEXT: s_cselect_b32 s22, 1, 0 -; GFX7-NEXT: s_add_u32 s24, s27, s24 +; GFX7-NEXT: s_add_u32 s24, s25, s24 ; GFX7-NEXT: v_mul_hi_u32 v0, v0, s10 -; GFX7-NEXT: s_addc_u32 s27, s28, s23 +; GFX7-NEXT: s_addc_u32 s25, s28, s23 ; GFX7-NEXT: s_mul_i32 s28, s3, s9 ; GFX7-NEXT: s_cselect_b32 s23, 1, 0 ; GFX7-NEXT: s_add_u32 s28, s28, s24 ; GFX7-NEXT: v_readfirstlane_b32 s30, v6 ; GFX7-NEXT: v_mul_hi_u32 v6, s16, v4 -; GFX7-NEXT: s_addc_u32 s27, s29, s27 +; GFX7-NEXT: s_addc_u32 s25, s29, s25 ; GFX7-NEXT: s_mul_i32 s29, s4, s8 ; GFX7-NEXT: s_cselect_b32 s24, 1, 0 ; GFX7-NEXT: s_add_u32 s28, s29, s28 ; GFX7-NEXT: v_readfirstlane_b32 s33, v0 ; GFX7-NEXT: v_mul_hi_u32 v0, v2, s9 -; GFX7-NEXT: s_addc_u32 s27, s30, s27 +; GFX7-NEXT: s_addc_u32 s29, s30, s25 ; GFX7-NEXT: s_mul_i32 s30, s16, s11 -; GFX7-NEXT: s_cselect_b32 s29, 1, 0 +; GFX7-NEXT: s_cselect_b32 s25, 1, 0 ; GFX7-NEXT: v_readfirstlane_b32 s31, v6 ; GFX7-NEXT: s_add_u32 s19, s30, s19 ; GFX7-NEXT: s_addc_u32 s28, s31, s28 @@ -919,84 +919,84 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX7-NEXT: s_addc_u32 s28, s35, s28 ; GFX7-NEXT: v_mul_hi_u32 v0, s16, v0 ; GFX7-NEXT: s_cselect_b32 s34, 1, 0 -; GFX7-NEXT: s_cmp_lg_u32 s26, 0 -; GFX7-NEXT: s_addc_u32 s19, s25, s19 +; GFX7-NEXT: s_cmp_lg_u32 s27, 0 +; GFX7-NEXT: s_addc_u32 s19, s26, s19 ; GFX7-NEXT: v_mov_b32_e32 v2, s13 -; GFX7-NEXT: s_cselect_b32 s25, 1, 0 +; GFX7-NEXT: s_cselect_b32 s26, 1, 0 ; GFX7-NEXT: s_cmp_lg_u32 s21, 0 ; GFX7-NEXT: v_mul_hi_u32 v6, s1, v2 ; GFX7-NEXT: s_addc_u32 s20, s20, 0 -; GFX7-NEXT: v_readfirstlane_b32 s26, v0 +; GFX7-NEXT: v_readfirstlane_b32 s27, v0 ; GFX7-NEXT: v_mul_hi_u32 v0, s2, v1 -; GFX7-NEXT: s_cmp_lg_u32 s25, 0 +; GFX7-NEXT: s_cmp_lg_u32 s26, 0 ; GFX7-NEXT: s_addc_u32 s20, s20, s28 -; GFX7-NEXT: s_mul_i32 s25, s16, s14 +; GFX7-NEXT: s_mul_i32 s26, s16, s14 ; GFX7-NEXT: s_mul_i32 s28, s1, s13 ; GFX7-NEXT: s_cselect_b32 s21, 1, 0 ; GFX7-NEXT: v_readfirstlane_b32 s35, v6 -; GFX7-NEXT: s_add_u32 s25, s28, s25 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_add_u32 s26, s28, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: v_readfirstlane_b32 s35, v0 ; GFX7-NEXT: v_mul_hi_u32 v0, v3, s11 ; GFX7-NEXT: s_mul_i32 s28, s2, s12 -; GFX7-NEXT: s_add_u32 s25, s28, s25 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_add_u32 s26, s28, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: v_readfirstlane_b32 s35, v0 ; GFX7-NEXT: v_mul_hi_u32 v0, v5, s10 ; GFX7-NEXT: s_mul_i32 s28, s3, s11 -; GFX7-NEXT: s_add_u32 s25, s28, s25 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_add_u32 s26, s28, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: v_readfirstlane_b32 s35, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s5 ; GFX7-NEXT: v_mul_hi_u32 v6, v0, s9 ; GFX7-NEXT: s_mul_i32 s28, s4, s10 -; GFX7-NEXT: s_add_u32 s25, s28, s25 +; GFX7-NEXT: s_add_u32 s26, s28, s26 ; GFX7-NEXT: v_mul_hi_u32 v1, s1, v1 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: v_readfirstlane_b32 s35, v6 ; GFX7-NEXT: v_mov_b32_e32 v6, s6 ; GFX7-NEXT: v_mul_hi_u32 v6, v6, s8 ; GFX7-NEXT: s_mul_i32 s28, s5, s9 -; GFX7-NEXT: s_add_u32 s25, s28, s25 +; GFX7-NEXT: s_add_u32 s26, s28, s26 ; GFX7-NEXT: v_mul_hi_u32 v2, s16, v2 ; GFX7-NEXT: v_readfirstlane_b32 s36, v1 ; GFX7-NEXT: v_mul_hi_u32 v1, s2, v4 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: s_mul_i32 s28, s6, s8 ; GFX7-NEXT: v_readfirstlane_b32 s35, v6 -; GFX7-NEXT: s_add_u32 s25, s28, s25 -; GFX7-NEXT: s_addc_u32 s26, s35, s26 +; GFX7-NEXT: s_add_u32 s26, s28, s26 +; GFX7-NEXT: s_addc_u32 s27, s35, s27 ; GFX7-NEXT: s_mul_i32 s28, s16, s13 ; GFX7-NEXT: v_readfirstlane_b32 s35, v2 -; GFX7-NEXT: s_add_u32 s27, s28, s27 +; GFX7-NEXT: s_add_u32 s28, s28, s29 ; GFX7-NEXT: v_readfirstlane_b32 s37, v1 ; GFX7-NEXT: v_mul_hi_u32 v1, v3, s10 -; GFX7-NEXT: s_addc_u32 s25, s35, s25 +; GFX7-NEXT: s_addc_u32 s26, s35, s26 ; GFX7-NEXT: s_mul_i32 s35, s1, s12 -; GFX7-NEXT: s_cselect_b32 s28, 1, 0 -; GFX7-NEXT: s_add_u32 s27, s35, s27 -; GFX7-NEXT: s_addc_u32 s25, s36, s25 +; GFX7-NEXT: s_cselect_b32 s29, 1, 0 +; GFX7-NEXT: s_add_u32 s28, s35, s28 +; GFX7-NEXT: s_addc_u32 s26, s36, s26 ; GFX7-NEXT: s_mul_i32 s36, s2, s11 ; GFX7-NEXT: s_cselect_b32 s35, 1, 0 -; GFX7-NEXT: s_add_u32 s27, s36, s27 +; GFX7-NEXT: s_add_u32 s28, s36, s28 ; GFX7-NEXT: v_readfirstlane_b32 s38, v1 ; GFX7-NEXT: v_mul_hi_u32 v1, v5, s9 -; GFX7-NEXT: s_addc_u32 s25, s37, s25 +; GFX7-NEXT: s_addc_u32 s26, s37, s26 ; GFX7-NEXT: s_mul_i32 s37, s3, s10 ; GFX7-NEXT: s_cselect_b32 s36, 1, 0 -; GFX7-NEXT: s_add_u32 s27, s37, s27 +; GFX7-NEXT: s_add_u32 s28, s37, s28 ; GFX7-NEXT: v_mul_hi_u32 v0, v0, s8 -; GFX7-NEXT: s_addc_u32 s25, s38, s25 +; GFX7-NEXT: s_addc_u32 s26, s38, s26 ; GFX7-NEXT: s_mul_i32 s38, s4, s9 ; GFX7-NEXT: s_cselect_b32 s37, 1, 0 ; GFX7-NEXT: v_readfirstlane_b32 s39, v1 -; GFX7-NEXT: s_add_u32 s27, s38, s27 -; GFX7-NEXT: s_addc_u32 s25, s39, s25 +; GFX7-NEXT: s_add_u32 s28, s38, s28 +; GFX7-NEXT: s_addc_u32 s26, s39, s26 ; GFX7-NEXT: s_mul_i32 s39, s5, s8 ; GFX7-NEXT: s_cselect_b32 s38, 1, 0 ; GFX7-NEXT: v_readfirstlane_b32 s40, v0 -; GFX7-NEXT: s_add_u32 s27, s39, s27 -; GFX7-NEXT: s_addc_u32 s25, s40, s25 +; GFX7-NEXT: s_add_u32 s28, s39, s28 +; GFX7-NEXT: s_addc_u32 s26, s40, s26 ; GFX7-NEXT: s_cselect_b32 s39, 1, 0 ; GFX7-NEXT: s_cmp_lg_u32 s31, 0 ; GFX7-NEXT: s_addc_u32 s30, s30, 0 @@ -1005,18 +1005,18 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX7-NEXT: s_cmp_lg_u32 s34, 0 ; GFX7-NEXT: s_addc_u32 s30, s30, 0 ; GFX7-NEXT: s_cmp_lg_u32 s21, 0 -; GFX7-NEXT: s_addc_u32 s21, s30, s27 -; GFX7-NEXT: s_cselect_b32 s27, 1, 0 +; GFX7-NEXT: s_addc_u32 s21, s30, s28 +; GFX7-NEXT: s_cselect_b32 s28, 1, 0 ; GFX7-NEXT: s_cmp_lg_u32 s23, 0 ; GFX7-NEXT: s_addc_u32 s22, s22, 0 ; GFX7-NEXT: s_cmp_lg_u32 s24, 0 ; GFX7-NEXT: s_addc_u32 s22, s22, 0 -; GFX7-NEXT: s_cmp_lg_u32 s29, 0 +; GFX7-NEXT: s_cmp_lg_u32 s25, 0 ; GFX7-NEXT: s_addc_u32 s22, s22, 0 -; GFX7-NEXT: s_cmp_lg_u32 s27, 0 -; GFX7-NEXT: s_addc_u32 s22, s22, s25 +; GFX7-NEXT: s_cmp_lg_u32 s28, 0 +; GFX7-NEXT: s_addc_u32 s22, s22, s26 ; GFX7-NEXT: s_mul_i32 s16, s16, s15 -; GFX7-NEXT: s_addc_u32 s15, s26, s16 +; GFX7-NEXT: s_addc_u32 s15, s27, s16 ; GFX7-NEXT: s_mul_i32 s1, s1, s14 ; GFX7-NEXT: s_cmp_lg_u32 s39, 0 ; GFX7-NEXT: s_addc_u32 s1, s15, s1 @@ -1033,7 +1033,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX7-NEXT: s_cmp_lg_u32 s35, 0 ; GFX7-NEXT: s_addc_u32 s1, s1, s5 ; GFX7-NEXT: s_mul_i32 s6, s6, s9 -; GFX7-NEXT: s_cmp_lg_u32 s28, 0 +; GFX7-NEXT: s_cmp_lg_u32 s29, 0 ; GFX7-NEXT: s_addc_u32 s1, s1, s6 ; GFX7-NEXT: s_mul_i32 s7, s7, s8 ; GFX7-NEXT: s_mul_i32 s0, s0, s8 @@ -1081,7 +1081,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX8-NEXT: v_mov_b32_e32 v1, s12 ; GFX8-NEXT: v_mul_hi_u32 v3, s16, v1 ; GFX8-NEXT: s_mul_i32 s18, s1, s8 -; GFX8-NEXT: s_cselect_b32 s25, 1, 0 +; GFX8-NEXT: s_cselect_b32 s26, 1, 0 ; GFX8-NEXT: s_add_u32 s18, s18, s17 ; GFX8-NEXT: s_addc_u32 s17, s23, s22 ; GFX8-NEXT: v_mov_b32_e32 v4, s11 @@ -1092,33 +1092,33 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX8-NEXT: s_mul_i32 s24, s1, s11 ; GFX8-NEXT: v_readfirstlane_b32 s28, v3 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: v_readfirstlane_b32 s27, v5 +; GFX8-NEXT: v_readfirstlane_b32 s25, v5 ; GFX8-NEXT: v_mul_hi_u32 v5, v3, s9 -; GFX8-NEXT: s_cselect_b32 s26, 1, 0 +; GFX8-NEXT: s_cselect_b32 s27, 1, 0 ; GFX8-NEXT: s_add_u32 s24, s24, s22 -; GFX8-NEXT: s_addc_u32 s23, s27, s23 +; GFX8-NEXT: s_addc_u32 s23, s25, s23 ; GFX8-NEXT: v_readfirstlane_b32 s29, v5 ; GFX8-NEXT: v_mov_b32_e32 v5, s4 ; GFX8-NEXT: v_mul_hi_u32 v6, v5, s8 -; GFX8-NEXT: s_mul_i32 s27, s2, s10 +; GFX8-NEXT: s_mul_i32 s25, s2, s10 ; GFX8-NEXT: s_cselect_b32 s22, 1, 0 -; GFX8-NEXT: s_add_u32 s24, s27, s24 +; GFX8-NEXT: s_add_u32 s24, s25, s24 ; GFX8-NEXT: v_mul_hi_u32 v0, v0, s10 -; GFX8-NEXT: s_addc_u32 s27, s28, s23 +; GFX8-NEXT: s_addc_u32 s25, s28, s23 ; GFX8-NEXT: s_mul_i32 s28, s3, s9 ; GFX8-NEXT: s_cselect_b32 s23, 1, 0 ; GFX8-NEXT: s_add_u32 s28, s28, s24 ; GFX8-NEXT: v_readfirstlane_b32 s30, v6 ; GFX8-NEXT: v_mul_hi_u32 v6, s16, v4 -; GFX8-NEXT: s_addc_u32 s27, s29, s27 +; GFX8-NEXT: s_addc_u32 s25, s29, s25 ; GFX8-NEXT: s_mul_i32 s29, s4, s8 ; GFX8-NEXT: s_cselect_b32 s24, 1, 0 ; GFX8-NEXT: s_add_u32 s28, s29, s28 ; GFX8-NEXT: v_readfirstlane_b32 s33, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v2, s9 -; GFX8-NEXT: s_addc_u32 s27, s30, s27 +; GFX8-NEXT: s_addc_u32 s29, s30, s25 ; GFX8-NEXT: s_mul_i32 s30, s16, s11 -; GFX8-NEXT: s_cselect_b32 s29, 1, 0 +; GFX8-NEXT: s_cselect_b32 s25, 1, 0 ; GFX8-NEXT: v_readfirstlane_b32 s31, v6 ; GFX8-NEXT: s_add_u32 s19, s30, s19 ; GFX8-NEXT: s_addc_u32 s28, s31, s28 @@ -1140,84 +1140,84 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX8-NEXT: s_addc_u32 s28, s35, s28 ; GFX8-NEXT: v_mul_hi_u32 v0, s16, v0 ; GFX8-NEXT: s_cselect_b32 s34, 1, 0 -; GFX8-NEXT: s_cmp_lg_u32 s26, 0 -; GFX8-NEXT: s_addc_u32 s19, s25, s19 +; GFX8-NEXT: s_cmp_lg_u32 s27, 0 +; GFX8-NEXT: s_addc_u32 s19, s26, s19 ; GFX8-NEXT: v_mov_b32_e32 v2, s13 -; GFX8-NEXT: s_cselect_b32 s25, 1, 0 +; GFX8-NEXT: s_cselect_b32 s26, 1, 0 ; GFX8-NEXT: s_cmp_lg_u32 s21, 0 ; GFX8-NEXT: v_mul_hi_u32 v6, s1, v2 ; GFX8-NEXT: s_addc_u32 s20, s20, 0 -; GFX8-NEXT: v_readfirstlane_b32 s26, v0 +; GFX8-NEXT: v_readfirstlane_b32 s27, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v1 -; GFX8-NEXT: s_cmp_lg_u32 s25, 0 +; GFX8-NEXT: s_cmp_lg_u32 s26, 0 ; GFX8-NEXT: s_addc_u32 s20, s20, s28 -; GFX8-NEXT: s_mul_i32 s25, s16, s14 +; GFX8-NEXT: s_mul_i32 s26, s16, s14 ; GFX8-NEXT: s_mul_i32 s28, s1, s13 ; GFX8-NEXT: s_cselect_b32 s21, 1, 0 ; GFX8-NEXT: v_readfirstlane_b32 s35, v6 -; GFX8-NEXT: s_add_u32 s25, s28, s25 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_add_u32 s26, s28, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: v_readfirstlane_b32 s35, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v3, s11 ; GFX8-NEXT: s_mul_i32 s28, s2, s12 -; GFX8-NEXT: s_add_u32 s25, s28, s25 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_add_u32 s26, s28, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: v_readfirstlane_b32 s35, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, v5, s10 ; GFX8-NEXT: s_mul_i32 s28, s3, s11 -; GFX8-NEXT: s_add_u32 s25, s28, s25 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_add_u32 s26, s28, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: v_readfirstlane_b32 s35, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s5 ; GFX8-NEXT: v_mul_hi_u32 v6, v0, s9 ; GFX8-NEXT: s_mul_i32 s28, s4, s10 -; GFX8-NEXT: s_add_u32 s25, s28, s25 +; GFX8-NEXT: s_add_u32 s26, s28, s26 ; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: v_readfirstlane_b32 s35, v6 ; GFX8-NEXT: v_mov_b32_e32 v6, s6 ; GFX8-NEXT: v_mul_hi_u32 v6, v6, s8 ; GFX8-NEXT: s_mul_i32 s28, s5, s9 -; GFX8-NEXT: s_add_u32 s25, s28, s25 +; GFX8-NEXT: s_add_u32 s26, s28, s26 ; GFX8-NEXT: v_mul_hi_u32 v2, s16, v2 ; GFX8-NEXT: v_readfirstlane_b32 s36, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, s2, v4 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: s_mul_i32 s28, s6, s8 ; GFX8-NEXT: v_readfirstlane_b32 s35, v6 -; GFX8-NEXT: s_add_u32 s25, s28, s25 -; GFX8-NEXT: s_addc_u32 s26, s35, s26 +; GFX8-NEXT: s_add_u32 s26, s28, s26 +; GFX8-NEXT: s_addc_u32 s27, s35, s27 ; GFX8-NEXT: s_mul_i32 s28, s16, s13 ; GFX8-NEXT: v_readfirstlane_b32 s35, v2 -; GFX8-NEXT: s_add_u32 s27, s28, s27 +; GFX8-NEXT: s_add_u32 s28, s28, s29 ; GFX8-NEXT: v_readfirstlane_b32 s37, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v3, s10 -; GFX8-NEXT: s_addc_u32 s25, s35, s25 +; GFX8-NEXT: s_addc_u32 s26, s35, s26 ; GFX8-NEXT: s_mul_i32 s35, s1, s12 -; GFX8-NEXT: s_cselect_b32 s28, 1, 0 -; GFX8-NEXT: s_add_u32 s27, s35, s27 -; GFX8-NEXT: s_addc_u32 s25, s36, s25 +; GFX8-NEXT: s_cselect_b32 s29, 1, 0 +; GFX8-NEXT: s_add_u32 s28, s35, s28 +; GFX8-NEXT: s_addc_u32 s26, s36, s26 ; GFX8-NEXT: s_mul_i32 s36, s2, s11 ; GFX8-NEXT: s_cselect_b32 s35, 1, 0 -; GFX8-NEXT: s_add_u32 s27, s36, s27 +; GFX8-NEXT: s_add_u32 s28, s36, s28 ; GFX8-NEXT: v_readfirstlane_b32 s38, v1 ; GFX8-NEXT: v_mul_hi_u32 v1, v5, s9 -; GFX8-NEXT: s_addc_u32 s25, s37, s25 +; GFX8-NEXT: s_addc_u32 s26, s37, s26 ; GFX8-NEXT: s_mul_i32 s37, s3, s10 ; GFX8-NEXT: s_cselect_b32 s36, 1, 0 -; GFX8-NEXT: s_add_u32 s27, s37, s27 +; GFX8-NEXT: s_add_u32 s28, s37, s28 ; GFX8-NEXT: v_mul_hi_u32 v0, v0, s8 -; GFX8-NEXT: s_addc_u32 s25, s38, s25 +; GFX8-NEXT: s_addc_u32 s26, s38, s26 ; GFX8-NEXT: s_mul_i32 s38, s4, s9 ; GFX8-NEXT: s_cselect_b32 s37, 1, 0 ; GFX8-NEXT: v_readfirstlane_b32 s39, v1 -; GFX8-NEXT: s_add_u32 s27, s38, s27 -; GFX8-NEXT: s_addc_u32 s25, s39, s25 +; GFX8-NEXT: s_add_u32 s28, s38, s28 +; GFX8-NEXT: s_addc_u32 s26, s39, s26 ; GFX8-NEXT: s_mul_i32 s39, s5, s8 ; GFX8-NEXT: s_cselect_b32 s38, 1, 0 ; GFX8-NEXT: v_readfirstlane_b32 s40, v0 -; GFX8-NEXT: s_add_u32 s27, s39, s27 -; GFX8-NEXT: s_addc_u32 s25, s40, s25 +; GFX8-NEXT: s_add_u32 s28, s39, s28 +; GFX8-NEXT: s_addc_u32 s26, s40, s26 ; GFX8-NEXT: s_cselect_b32 s39, 1, 0 ; GFX8-NEXT: s_cmp_lg_u32 s31, 0 ; GFX8-NEXT: s_addc_u32 s30, s30, 0 @@ -1226,18 +1226,18 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX8-NEXT: s_cmp_lg_u32 s34, 0 ; GFX8-NEXT: s_addc_u32 s30, s30, 0 ; GFX8-NEXT: s_cmp_lg_u32 s21, 0 -; GFX8-NEXT: s_addc_u32 s21, s30, s27 -; GFX8-NEXT: s_cselect_b32 s27, 1, 0 +; GFX8-NEXT: s_addc_u32 s21, s30, s28 +; GFX8-NEXT: s_cselect_b32 s28, 1, 0 ; GFX8-NEXT: s_cmp_lg_u32 s23, 0 ; GFX8-NEXT: s_addc_u32 s22, s22, 0 ; GFX8-NEXT: s_cmp_lg_u32 s24, 0 ; GFX8-NEXT: s_addc_u32 s22, s22, 0 -; GFX8-NEXT: s_cmp_lg_u32 s29, 0 +; GFX8-NEXT: s_cmp_lg_u32 s25, 0 ; GFX8-NEXT: s_addc_u32 s22, s22, 0 -; GFX8-NEXT: s_cmp_lg_u32 s27, 0 -; GFX8-NEXT: s_addc_u32 s22, s22, s25 +; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_addc_u32 s22, s22, s26 ; GFX8-NEXT: s_mul_i32 s16, s16, s15 -; GFX8-NEXT: s_addc_u32 s15, s26, s16 +; GFX8-NEXT: s_addc_u32 s15, s27, s16 ; GFX8-NEXT: s_mul_i32 s1, s1, s14 ; GFX8-NEXT: s_cmp_lg_u32 s39, 0 ; GFX8-NEXT: s_addc_u32 s1, s15, s1 @@ -1254,7 +1254,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX8-NEXT: s_cmp_lg_u32 s35, 0 ; GFX8-NEXT: s_addc_u32 s1, s1, s5 ; GFX8-NEXT: s_mul_i32 s6, s6, s9 -; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_cmp_lg_u32 s29, 0 ; GFX8-NEXT: s_addc_u32 s1, s1, s6 ; GFX8-NEXT: s_mul_i32 s7, s7, s8 ; GFX8-NEXT: s_mul_i32 s0, s0, s8 @@ -1286,15 +1286,15 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX9-NEXT: s_cselect_b32 s21, 1, 0 ; GFX9-NEXT: s_mul_hi_u32 s23, s16, s9 ; GFX9-NEXT: s_add_u32 s17, s22, s17 -; GFX9-NEXT: s_addc_u32 s18, s23, s18 -; GFX9-NEXT: s_mul_i32 s23, s1, s8 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 +; GFX9-NEXT: s_addc_u32 s22, s23, s18 +; GFX9-NEXT: s_mul_i32 s18, s1, s8 +; GFX9-NEXT: s_cselect_b32 s23, 1, 0 ; GFX9-NEXT: s_mul_hi_u32 s24, s1, s8 -; GFX9-NEXT: s_add_u32 s17, s23, s17 -; GFX9-NEXT: s_addc_u32 s18, s24, s18 +; GFX9-NEXT: s_add_u32 s18, s18, s17 +; GFX9-NEXT: s_addc_u32 s17, s24, s22 ; GFX9-NEXT: s_mul_i32 s24, s16, s12 ; GFX9-NEXT: s_mul_i32 s26, s1, s11 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 +; GFX9-NEXT: s_cselect_b32 s22, 1, 0 ; GFX9-NEXT: s_mul_hi_u32 s25, s16, s12 ; GFX9-NEXT: s_mul_hi_u32 s27, s1, s11 ; GFX9-NEXT: s_add_u32 s24, s26, s24 @@ -1335,8 +1335,8 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX9-NEXT: s_add_u32 s19, s34, s19 ; GFX9-NEXT: s_addc_u32 s24, s35, s24 ; GFX9-NEXT: s_cselect_b32 s34, 1, 0 -; GFX9-NEXT: s_cmp_lg_u32 s23, 0 -; GFX9-NEXT: s_addc_u32 s19, s22, s19 +; GFX9-NEXT: s_cmp_lg_u32 s22, 0 +; GFX9-NEXT: s_addc_u32 s19, s23, s19 ; GFX9-NEXT: s_cselect_b32 s22, 1, 0 ; GFX9-NEXT: s_cmp_lg_u32 s21, 0 ; GFX9-NEXT: s_addc_u32 s20, s20, 0 @@ -1439,8 +1439,8 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { ; GFX9-NEXT: s_mul_i32 s7, s7, s8 ; GFX9-NEXT: s_mul_i32 s0, s0, s8 ; GFX9-NEXT: s_add_u32 s7, s7, s1 -; GFX9-NEXT: s_mov_b32 s1, s17 -; GFX9-NEXT: s_mov_b32 s2, s18 +; GFX9-NEXT: s_mov_b32 s1, s18 +; GFX9-NEXT: s_mov_b32 s2, s17 ; GFX9-NEXT: s_mov_b32 s3, s19 ; GFX9-NEXT: s_mov_b32 s4, s20 ; GFX9-NEXT: s_mov_b32 s5, s21 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir index b76fbdd9bed15..e5677b0d1c329 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir @@ -50,7 +50,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -71,7 +74,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -92,7 +98,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -180,7 +189,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_ANYEXT %1 @@ -198,7 +210,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_ANYEXT %1 @@ -216,7 +231,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s64) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir index d87bc1f01bdb8..d210e9e8c1ffd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir @@ -62,8 +62,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:sgpr(s1) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[FREEZE]](s1) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[FREEZE]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] + ; CHECK-NEXT: $sgpr0 = COPY [[SELECT]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0(s32) %2:_(s1) = G_FREEZE %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index 9e56cb85bf409..1a32eb4e78f10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -69,7 +69,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -90,7 +93,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -111,7 +117,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -199,7 +208,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_SEXT %1 @@ -217,7 +229,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_SEXT %1 @@ -235,7 +250,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s64) = G_SEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index 8756061d89ca2..9a67afde9affc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -68,7 +68,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -89,7 +92,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -110,7 +116,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -198,7 +207,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s16) = G_SELECT [[ZEXT]](s16), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_ZEXT %1 @@ -216,7 +228,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_ZEXT %1 @@ -234,7 +249,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[ZEXT]](s32), [[C1]], [[C]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s64) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index cded5c94edf8c..7bed94ff8ff4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -1081,7 +1081,6 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) { ; GFX8-NEXT: s_xor_b32 s0, s1, s0 ; GFX8-NEXT: s_ashr_i32 s1, s3, 23 ; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000 -; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_cmp_lg_u32 s0, 0 ; GFX8-NEXT: s_cselect_b32 s0, s1, s2 ; GFX8-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 4248f7b6a1583..bef24b766b4c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: .LBB1_3: ; %Flow ; CHECK-NEXT: s_xor_b32 s0, s0, 1 -; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index d0c55c69f5087..174963c3d6e56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -327,7 +327,6 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: .LBB1_3: ; %Flow ; CHECK-NEXT: s_xor_b32 s0, s0, 1 -; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 65455d754be4f..0177f00fbb64b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -1081,7 +1081,6 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) { ; GFX8-NEXT: s_xor_b32 s0, s1, s0 ; GFX8-NEXT: s_ashr_i32 s1, s3, 23 ; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000 -; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_cmp_lg_u32 s0, 0 ; GFX8-NEXT: s_cselect_b32 s0, s1, s2 ; GFX8-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll index f8e7e5ecd6260..e05531dac57ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll @@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_ssubo_i32(i32 inreg %a, i32 inreg %b) { ; GFX7-NEXT: s_cmp_gt_i32 s1, 0 ; GFX7-NEXT: s_cselect_b32 s1, 1, 0 ; GFX7-NEXT: s_xor_b32 s0, s1, s0 -; GFX7-NEXT: s_and_b32 s0, s0, 1 ; GFX7-NEXT: s_sub_i32 s0, s2, s0 ; GFX7-NEXT: ; return to shader part epilog ; @@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_ssubo_i32(i32 inreg %a, i32 inreg %b) { ; GFX8-NEXT: s_cmp_gt_i32 s1, 0 ; GFX8-NEXT: s_cselect_b32 s1, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_sub_i32 s0, s2, s0 ; GFX8-NEXT: ; return to shader part epilog ; @@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_ssubo_i32(i32 inreg %a, i32 inreg %b) { ; GFX9-NEXT: s_cmp_gt_i32 s1, 0 ; GFX9-NEXT: s_cselect_b32 s1, 1, 0 ; GFX9-NEXT: s_xor_b32 s0, s1, s0 -; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_sub_i32 s0, s2, s0 ; GFX9-NEXT: ; return to shader part epilog %ssubo = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) @@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_ssubo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX7-NEXT: s_cselect_b32 s3, 1, 0 ; GFX7-NEXT: s_xor_b32 s0, s2, s0 ; GFX7-NEXT: s_xor_b32 s1, s3, s1 -; GFX7-NEXT: s_and_b32 s0, s0, 1 -; GFX7-NEXT: s_and_b32 s1, s1, 1 ; GFX7-NEXT: s_sub_i32 s0, s4, s0 ; GFX7-NEXT: s_sub_i32 s1, s5, s1 ; GFX7-NEXT: ; return to shader part epilog @@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_ssubo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX8-NEXT: s_cselect_b32 s3, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s2, s0 ; GFX8-NEXT: s_xor_b32 s1, s3, s1 -; GFX8-NEXT: s_and_b32 s0, s0, 1 -; GFX8-NEXT: s_and_b32 s1, s1, 1 ; GFX8-NEXT: s_sub_i32 s0, s4, s0 ; GFX8-NEXT: s_sub_i32 s1, s5, s1 ; GFX8-NEXT: ; return to shader part epilog @@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_ssubo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b ; GFX9-NEXT: s_cselect_b32 s3, 1, 0 ; GFX9-NEXT: s_xor_b32 s0, s2, s0 ; GFX9-NEXT: s_xor_b32 s1, s3, s1 -; GFX9-NEXT: s_and_b32 s0, s0, 1 -; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_sub_i32 s0, s4, s0 ; GFX9-NEXT: s_sub_i32 s1, s5, s1 ; GFX9-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 77737b356ff6e..c14a7ed5b8c8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -323,7 +323,6 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: .LBB1_3: ; %Flow ; CHECK-NEXT: s_xor_b32 s1, s4, 1 -; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 097f6642cbc66..6f7ebc8f300c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -319,7 +319,6 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: .LBB1_3: ; %Flow ; CHECK-NEXT: s_xor_b32 s1, s4, 1 -; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll b/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll index e996fda4c9fd6..7c481310de648 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll +++ b/llvm/test/CodeGen/AMDGPU/commute-compares-scalar-float.ll @@ -19,9 +19,8 @@ define amdgpu_vs void @fcmp_f32_olt_to_ogt(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_gt_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -51,9 +50,8 @@ define amdgpu_vs void @fcmp_f32_ogt_to_olt(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lt_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -83,9 +81,8 @@ define amdgpu_vs void @fcmp_f32_ole_to_oge(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ge_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -115,9 +112,8 @@ define amdgpu_vs void @fcmp_f32_oge_to_ole(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_le_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -147,9 +143,8 @@ define amdgpu_vs void @fcmp_f32_ult_to_ugt(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nle_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -179,9 +174,8 @@ define amdgpu_vs void @fcmp_f32_ugt_to_ult(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nge_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -211,9 +205,8 @@ define amdgpu_vs void @fcmp_f32_ule_to_uge(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlt_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -243,9 +236,8 @@ define amdgpu_vs void @fcmp_f32_uge_to_ule(ptr addrspace(1) inreg %out, float in ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ngt_f32 s2, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -275,9 +267,8 @@ define amdgpu_vs void @fcmp_f16_olt_to_ogt(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_gt_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -307,9 +298,8 @@ define amdgpu_vs void @fcmp_f16_ogt_to_olt(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lt_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -339,9 +329,8 @@ define amdgpu_vs void @fcmp_f16_ole_to_oge(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ge_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -371,9 +360,8 @@ define amdgpu_vs void @fcmp_f16_oge_to_ole(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_le_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -403,9 +391,8 @@ define amdgpu_vs void @fcmp_f16_ult_to_ugt(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nle_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -435,9 +422,8 @@ define amdgpu_vs void @fcmp_f16_ugt_to_ult(ptr addrspace(1) inreg %out, half inr ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nge_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -467,9 +453,8 @@ define amdgpu_vs void @fcmp_ule_to_uge(ptr addrspace(1) inreg %out, half inreg % ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlt_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -499,9 +484,8 @@ define amdgpu_vs void @fcmp_uge_to_ule(ptr addrspace(1) inreg %out, half inreg % ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ngt_f16 s2, 0x4000 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index 97216b6c94693..dd60749db53f8 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -252,7 +252,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 ; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 ; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 ; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 ; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 @@ -381,7 +380,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 ; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 ; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 ; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 ; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 @@ -525,8 +523,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 ; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 ; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index 671ead6127308..a33388b747d64 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -34,8 +34,7 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff ; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00 -; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 -; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll index 19e50be155a96..f8424b89eb7a6 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sopc.ll @@ -19,9 +19,8 @@ define amdgpu_vs void @f32_olt(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lt_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -51,9 +50,8 @@ define amdgpu_vs void @f32_oeq(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_eq_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -83,9 +81,8 @@ define amdgpu_vs void @f32_ole(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_le_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -115,9 +112,8 @@ define amdgpu_vs void @f32_ogt(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_gt_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -147,9 +143,8 @@ define amdgpu_vs void @f32_one(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lg_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -179,9 +174,8 @@ define amdgpu_vs void @f32_oge(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ge_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -211,9 +205,8 @@ define amdgpu_vs void @f32_ord(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_o_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -243,9 +236,8 @@ define amdgpu_vs void @f32_uno(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_u_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -275,9 +267,8 @@ define amdgpu_vs void @f32_ult(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nge_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -307,9 +298,8 @@ define amdgpu_vs void @f32_ueq(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlg_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -339,9 +329,8 @@ define amdgpu_vs void @f32_ule(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ngt_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -371,9 +360,8 @@ define amdgpu_vs void @f32_ugt(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nle_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -403,9 +391,8 @@ define amdgpu_vs void @f32_une(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_neq_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -435,9 +422,8 @@ define amdgpu_vs void @f32_uge(ptr addrspace(1) inreg %out, float inreg %a, floa ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlt_f32 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -467,9 +453,8 @@ define amdgpu_vs void @f16_olt(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lt_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -499,9 +484,8 @@ define amdgpu_vs void @f16_oeq(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_eq_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -531,9 +515,8 @@ define amdgpu_vs void @f16_ole(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_le_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -563,9 +546,8 @@ define amdgpu_vs void @f16_ogt(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_gt_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -595,9 +577,8 @@ define amdgpu_vs void @f16_one(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_lg_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -627,9 +608,8 @@ define amdgpu_vs void @f16_oge(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ge_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -659,9 +639,8 @@ define amdgpu_vs void @f16_ord(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_o_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -691,9 +670,8 @@ define amdgpu_vs void @f16_uno(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_u_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -723,9 +701,8 @@ define amdgpu_vs void @f16_ult(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nge_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -755,9 +732,8 @@ define amdgpu_vs void @f16_ueq(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlg_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -787,9 +763,8 @@ define amdgpu_vs void @f16_ule(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_ngt_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -819,9 +794,8 @@ define amdgpu_vs void @f16_ugt(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nle_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -851,9 +825,8 @@ define amdgpu_vs void @f16_une(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_neq_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0 @@ -883,9 +856,8 @@ define amdgpu_vs void @f16_uge(ptr addrspace(1) inreg %out, half inreg %a, half ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_cmp_nlt_f16 s2, s3 ; GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 +; GISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GISEL-NEXT: s_nop 0